From 863f82dc8094063357894f38ed03148d837ae911 Mon Sep 17 00:00:00 2001 From: Liang Juhao Date: Tue, 19 May 2026 16:32:35 +0800 Subject: [PATCH 1/5] fix: add proper warmup phase to online + burst scenarios MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The online and burst scenarios read `online_warmup_runs` from suite.json but the value was never applied — every reported TTFT p99 was contaminated by cold-engine spikes (JIT compile, CUDA-graph allocation, KV cache priming) at the start of the timed phase. This made the first QPS level's p99 unreliable and any submission's relative-burst ratio noisy. Changes - loadgen/loadgen.py: new `_warmup_requests` helper used by `_run_online_async` and `_run_burst_async`. Fires N dummy requests sequentially before the timed phase; results are discarded and warmup-time exceptions are swallowed (logged via tqdm.write) so a flaky engine cannot block a submission. - suites/suite_{A,B,D,E,F,G}/suite.json: replace dead `online_warmup_runs: 0` with `online_warmup_requests: 10`. Add `burst_warmup_requests: 10` to suite_A and suite_B (the two suites that include the burst scenario). - schema/suite.schema.json: declare the new properties with descriptions. `online_warmup_runs` kept as deprecated alias to avoid breaking any third-party suite still carrying it. - DEVELOPMENT.md: mirror the new field names in the suite template. - loadgen/tests/test_warmup.py: regression coverage. Asserts (a) warmup fires the configured count of dummy calls, (b) fast warmup latencies do NOT leak into recorded p50/p99 distributions, (c) zero warmup is a no-op, (d) a failing warmup request does not abort the timed phase. Tested locally with `pytest loadgen/tests -q` (8 passed). Co-authored-by: Cursor --- DEVELOPMENT.md | 3 +- loadgen/loadgen.py | 64 ++++++++- loadgen/tests/__init__.py | 0 loadgen/tests/test_warmup.py | 254 +++++++++++++++++++++++++++++++++++ schema/suite.schema.json | 14 +- suites/suite_A/suite.json | 5 +- suites/suite_B/suite.json | 3 +- suites/suite_D/suite.json | 2 +- suites/suite_E/suite.json | 2 +- suites/suite_F/suite.json | 2 +- suites/suite_G/suite.json | 2 +- 11 files changed, 340 insertions(+), 11 deletions(-) create mode 100644 loadgen/tests/__init__.py create mode 100644 loadgen/tests/test_warmup.py diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 990f73bc..13bd8d92 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -520,7 +520,8 @@ Copy the closest existing suite and modify. Required fields: "online_sla_ttft_ms": 500, "num_runs": 3, "warmup_runs": 1, - "online_warmup_runs": 0, + "online_warmup_requests": 10, + "burst_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 200, diff --git a/loadgen/loadgen.py b/loadgen/loadgen.py index 758abc41..198bb378 100644 --- a/loadgen/loadgen.py +++ b/loadgen/loadgen.py @@ -95,19 +95,35 @@ def __init__( # Use different request counts per scenario # offline: use request_count (default 200, fast) # online/interactive: use online_request_count if set, else all requests + # + # Warmup semantics differ per scenario: + # offline / interactive : `warmup_runs` = number of full passes to discard + # (interactive_warmup_runs may override for interactive) + # sustained : `warmup_minutes` = time window discarded + # online / burst : `online_warmup_requests` / `burst_warmup_requests` + # = number of dummy requests fired sequentially before + # the timed phase, used to JIT-compile kernels, allocate + # CUDA graphs, prime the KV cache, etc. Results are + # never recorded. Without this warmup, the first few + # requests of the first QPS level inflate p99 by + # hundreds of ms on cold engines. + self.online_warmup_requests = 0 + self.burst_warmup_requests = 0 if scenario == "offline": count = suite.get("request_count") self.warmup_runs = suite.get("warmup_runs", 1) elif scenario == "online": # online and interactive need more requests for reliable p99 count = suite.get("online_request_count", suite.get("request_count")) - self.warmup_runs = suite.get("online_warmup_runs", 0) + self.warmup_runs = 0 # online doesn't use full-pass warmup + self.online_warmup_requests = suite.get("online_warmup_requests", 10) elif scenario == "interactive": count = suite.get("interactive_request_count", suite.get("request_count")) self.warmup_runs = suite.get("interactive_warmup_runs", 0) elif scenario == "burst": count = suite.get("online_request_count", suite.get("request_count")) - self.warmup_runs = suite.get("online_warmup_runs", 0) + self.warmup_runs = 0 # burst doesn't use full-pass warmup + self.burst_warmup_requests = suite.get("burst_warmup_requests", 10) elif scenario == "speculative": count = suite.get("request_count") self.warmup_runs = suite.get("warmup_runs", 1) @@ -535,11 +551,42 @@ def _run_online(self, inference_fn: Callable) -> dict: loop = asyncio.get_event_loop() return loop.run_until_complete(self._run_online_async(inference_fn)) + async def _warmup_requests(self, async_inference_fn, count: int, label: str) -> None: + """ + Fire `count` dummy requests sequentially before timed measurement. + + Cycles through self.requests if count > len(requests). All results are + discarded — purpose is to JIT-compile kernels, allocate CUDA graphs, + prime the KV cache, and let the engine reach steady-state schedules + before the timed phase. Without this, the first few timed requests on + cold engines inflate p99 by hundreds of milliseconds. + + Exceptions during warmup are logged and swallowed; warmup failures + must never abort the timed run. + """ + if count <= 0 or not self.requests: + return + tqdm.write( + f"[{label} warmup] firing {count} dummy requests " + "(results discarded — engine JIT/cache warm-up)" + ) + for i in range(count): + req = self.requests[i % len(self.requests)] + try: + await async_inference_fn(req) + except Exception as e: + tqdm.write(f"[{label} warmup] request {i} failed (ignored): {e}") + async def _run_online_async(self, async_inference_fn) -> dict: """ Async implementation of the online scenario. Generates Poisson arrival times upfront, then fires all requests concurrently via asyncio.gather so the engine sees real concurrent load. + + A warmup phase fires `online_warmup_requests` dummy requests + sequentially before the QPS sweep. Their latencies are not recorded + in `results_by_qps`. This prevents cold-engine TTFT spikes from + inflating p99 at the first QPS level. """ loop = asyncio.get_event_loop() sla_ms = self.suite["online_sla_ttft_ms"] @@ -547,6 +594,10 @@ async def _run_online_async(self, async_inference_fn) -> dict: all_samples: list[SampleRecord] = [] max_valid_qps = 0.0 + await self._warmup_requests( + async_inference_fn, self.online_warmup_requests, "online" + ) + for target_qps in self.suite["online_qps_levels"]: print(f"[online] target_qps={target_qps}") run_ttfts: list[list[float]] = [] @@ -665,6 +716,11 @@ async def _run_burst_async(self, async_inference_fn) -> dict: sla_met_during_burst — bool: p99 TTFT during burst < online_sla_ttft_ms burst_degradation_ratio — burst_ttft_p99 / steady_ttft_p99 (higher = worse) results_by_cycle — per-cycle breakdown + + A warmup phase fires `burst_warmup_requests` dummy requests + sequentially before the first cycle. Their latencies are excluded + from steady/burst windows so the first cycle's steady-state + measurement is not contaminated by cold-engine TTFT spikes. """ loop = asyncio.get_event_loop() sla_ms = self.suite["online_sla_ttft_ms"] @@ -674,6 +730,10 @@ async def _run_burst_async(self, async_inference_fn) -> dict: steady_dur = self.suite["burst_interval_seconds"] num_runs = self.suite.get("num_runs", 3) + await self._warmup_requests( + async_inference_fn, self.burst_warmup_requests, "burst" + ) + all_steady_ttfts: list[float] = [] all_burst_ttfts: list[float] = [] results_by_cycle = [] diff --git a/loadgen/tests/__init__.py b/loadgen/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/loadgen/tests/test_warmup.py b/loadgen/tests/test_warmup.py new file mode 100644 index 00000000..ea625986 --- /dev/null +++ b/loadgen/tests/test_warmup.py @@ -0,0 +1,254 @@ +""" +Tests for the warmup phase in online and burst scenarios. + +These scenarios used to read `online_warmup_runs` from suite.json but +silently ignored the value — every reported p99 was contaminated by +cold-engine TTFT spikes. This test suite locks down the fix so future +refactors can't reintroduce the bug. + +What is verified: +- Warmup requests are fired in `online` and `burst` BEFORE the timed phase +- Warmup latencies are NOT counted in the returned distribution +- A counter on the mock inference_fn confirms the exact request budget +- Warmup is a no-op when the parameter is 0 (back-compat) +- An exception during warmup does not abort the timed phase +""" + +from __future__ import annotations + +import asyncio +from typing import Optional + +import pytest + +from loadgen.loadgen import AccelMarkLoadGen +from loadgen.types import InferenceResult + + +# ── Fixtures ────────────────────────────────────────────────────────────────── + +def _make_requests(n: int): + """Build n minimal InferenceRequest-like objects using the same shim + loadgen.py falls back to when benchmark_runner is not importable.""" + from loadgen.loadgen import InferenceRequest + return [ + InferenceRequest( + prompt=f"prompt {i}", + request_id=i, + input_tokens=10, + max_tokens=20, + ) + for i in range(n) + ] + + +def _online_suite(qps_levels=(2.0,), warmup_requests: int = 5, num_runs: int = 1): + return { + "num_runs": num_runs, + "online_qps_levels": list(qps_levels), + "online_sla_ttft_ms": 1000, + "online_request_count": 8, + "online_warmup_requests": warmup_requests, + "input_tokens": 10, + } + + +def _burst_suite(warmup_requests: int = 5): + return { + "num_runs": 1, + "online_sla_ttft_ms": 1000, + "online_request_count": 6, + "burst_steady_qps": 2.0, + "burst_peak_qps": 4.0, + "burst_duration_seconds": 0.3, + "burst_interval_seconds": 0.3, + "burst_warmup_requests": warmup_requests, + "input_tokens": 10, + } + + +class MockInferenceFn: + """Counts every call and exposes an async callable as `.fn` for loadgen. + + The fast warmup latency vs slow timed latency makes it trivial to + assert that warmup requests are excluded from the distribution: if + warmup latencies leaked into results, p50/p99 would collapse to + the fast value. + + Note: loadgen uses `asyncio.iscoroutinefunction()` to detect async + inference_fn, which returns False for a class with `async __call__`. + So we expose `self.fn` as a real `async def` closure bound to this + instance's state. + """ + + def __init__(self, *, warmup_ttft_ms: float = 1.0, timed_ttft_ms: float = 100.0, + fail_first_n: int = 0): + self.call_count = 0 + self.warmup_ttft_ms = warmup_ttft_ms + self.timed_ttft_ms = timed_ttft_ms + self.fail_first_n = fail_first_n + self.warmup_budget: Optional[int] = None + + state = self # closure capture + + async def _fn(request) -> InferenceResult: + idx = state.call_count + state.call_count += 1 + if idx < state.fail_first_n: + raise RuntimeError(f"simulated failure on warmup request {idx}") + ttft = ( + state.warmup_ttft_ms + if state.warmup_budget is not None and idx < state.warmup_budget + else state.timed_ttft_ms + ) + await asyncio.sleep(0) # yield control + return InferenceResult( + first_token_time_ms=ttft, + total_time_ms=ttft * 2, + output_tokens=20, + input_tokens=10, + success=True, + ) + + self.fn = _fn + + def set_warmup_budget(self, n: int) -> None: + """Tell the mock how many of the next calls count as warmup.""" + self.warmup_budget = n + + +# ── online warmup ───────────────────────────────────────────────────────────── + +def test_online_warmup_fires_configured_count(tmp_path): + """Online scenario must fire exactly `online_warmup_requests` warmup calls.""" + suite = _online_suite(qps_levels=(2.0,), warmup_requests=5) + requests = _make_requests(8) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + + fn = MockInferenceFn() + fn.set_warmup_budget(5) + gen.run(fn.fn) + + # warmup (5) + 8 requests × 1 QPS × 1 run = 13 calls minimum + assert fn.call_count >= 13, ( + f"expected at least 13 inference_fn calls (5 warmup + 8 timed), " + f"got {fn.call_count}" + ) + + +def test_online_warmup_latencies_excluded_from_p99(tmp_path): + """If warmup latencies leaked into the recorded distribution, p99 would + collapse to the fast warmup value. Verify it stays at the timed value.""" + suite = _online_suite(qps_levels=(2.0,), warmup_requests=5) + requests = _make_requests(8) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + + fn = MockInferenceFn(warmup_ttft_ms=1.0, timed_ttft_ms=100.0) + fn.set_warmup_budget(5) + result = gen.run(fn.fn) + + qps_results = result["online"]["results_by_qps"] + assert qps_results, "expected at least one QPS level result" + p50 = qps_results[0]["ttft_ms_p50"] + p99 = qps_results[0]["ttft_ms_p99"] + + # If warmup leaked in, p50 would be near 1.0 ms. With warmup excluded, + # every recorded request returns 100 ms, so all percentiles snap there. + assert abs(p50 - 100.0) < 0.5, f"p50 contaminated by warmup: {p50}" + assert abs(p99 - 100.0) < 0.5, f"p99 contaminated by warmup: {p99}" + + +def test_online_warmup_zero_is_noop(tmp_path): + """Backward compat: setting online_warmup_requests=0 must skip warmup.""" + suite = _online_suite(qps_levels=(2.0,), warmup_requests=0) + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + + fn = MockInferenceFn() + gen.run(fn.fn) + + # No warmup means exactly 6 calls (1 QPS × 6 requests × 1 run). + assert fn.call_count == 6, ( + f"warmup=0 should fire only timed requests; got {fn.call_count} calls" + ) + + +def test_online_warmup_failure_does_not_abort_run(tmp_path): + """A failing warmup request must be logged and ignored — the timed phase + must still execute. Otherwise a flaky engine could prevent any submission.""" + suite = _online_suite(qps_levels=(2.0,), warmup_requests=3) + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + + # Fail the first 2 warmup requests; the 3rd warmup + all timed must run. + fn = MockInferenceFn(fail_first_n=2) + fn.set_warmup_budget(3) + result = gen.run(fn.fn) + + qps_results = result["online"]["results_by_qps"] + assert qps_results, "timed phase did not run despite warmup failures" + assert fn.call_count >= 3 + 6 # 3 warmup attempts + 6 timed + + +# ── burst warmup ────────────────────────────────────────────────────────────── + +def test_burst_warmup_fires_configured_count(tmp_path): + suite = _burst_suite(warmup_requests=4) + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "burst", str(tmp_path)) + + fn = MockInferenceFn() + fn.set_warmup_budget(4) + gen.run(fn.fn) + + # At least 4 warmup calls must have fired before the timed cycles. + assert fn.call_count >= 4, ( + f"burst warmup did not fire enough requests: {fn.call_count}" + ) + + +def test_burst_warmup_zero_is_noop(tmp_path): + """Suites that omit burst_warmup_requests entirely default to 10; setting + it to 0 must skip warmup.""" + suite = _burst_suite(warmup_requests=0) + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "burst", str(tmp_path)) + + fn = MockInferenceFn() + n_before = fn.call_count + gen.run(fn.fn) + # No assertion on exact count (timed cycles depend on Poisson timing), + # but we can assert the mock saw at least 1 timed call. + assert fn.call_count > n_before + + +# ── default values ──────────────────────────────────────────────────────────── + +def test_online_warmup_default_is_ten(tmp_path): + """Suite without online_warmup_requests should get a sensible default.""" + suite = { + "num_runs": 1, + "online_qps_levels": [2.0], + "online_sla_ttft_ms": 1000, + "online_request_count": 4, + "input_tokens": 10, + } + requests = _make_requests(4) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + assert gen.online_warmup_requests == 10 + + +def test_burst_warmup_default_is_ten(tmp_path): + suite = { + "num_runs": 1, + "online_sla_ttft_ms": 1000, + "online_request_count": 4, + "burst_steady_qps": 2.0, + "burst_peak_qps": 4.0, + "burst_duration_seconds": 0.3, + "burst_interval_seconds": 0.3, + "input_tokens": 10, + } + requests = _make_requests(4) + gen = AccelMarkLoadGen(suite, requests, "burst", str(tmp_path)) + assert gen.burst_warmup_requests == 10 diff --git a/schema/suite.schema.json b/schema/suite.schema.json index 1367fe0d..7309b58a 100644 --- a/schema/suite.schema.json +++ b/schema/suite.schema.json @@ -117,11 +117,23 @@ "online_sla_ttft_ms": { "type": ["integer", "null"], "minimum": 1 }, "online_sla_ttft_ms_relaxed": { "type": ["integer", "null"], "minimum": 1 }, "online_request_count": { "type": ["integer", "null"], "minimum": 1 }, - "online_warmup_runs": { "type": "integer", "minimum": 0 }, + "online_warmup_runs": { + "type": "integer", "minimum": 0, + "description": "DEPRECATED. Previously unused — kept only to silence schema warnings on older suites. Use online_warmup_requests instead." + }, + "online_warmup_requests": { + "type": "integer", "minimum": 0, + "description": "Number of dummy requests fired sequentially before the online QPS sweep. Results are discarded. Used to JIT-compile kernels and prime the engine on cold start. Defaults to 10 if not set." + }, "interactive_request_count": { "type": ["integer", "null"], "minimum": 1 }, "interactive_warmup_runs": { "type": "integer", "minimum": 0 }, + "burst_warmup_requests": { + "type": "integer", "minimum": 0, + "description": "Number of dummy requests fired sequentially before the first burst cycle. Results are discarded. Defaults to 10 if not set." + }, + "sustained_concurrency": { "type": "integer", "minimum": 1 }, "duration_minutes": { "type": "number", "minimum": 0 }, "sample_interval_seconds": { "type": "number", "minimum": 0 }, diff --git a/suites/suite_A/suite.json b/suites/suite_A/suite.json index d8f6914a..e902022a 100644 --- a/suites/suite_A/suite.json +++ b/suites/suite_A/suite.json @@ -29,8 +29,9 @@ "num_runs": 3, "warmup_runs": 1, "warmup_minutes": 2, - "online_warmup_runs": 0, - "interactive_warmup_runs": 0 , + "online_warmup_requests": 10, + "burst_warmup_requests": 10, + "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 100, "_request_count_note": "offline uses request_count (100), online uses online_request_count (300, minimum for robust p99), interactive uses interactive_request_count (150, minimum for robust p95)", diff --git a/suites/suite_B/suite.json b/suites/suite_B/suite.json index b331dbd8..87ba98f0 100644 --- a/suites/suite_B/suite.json +++ b/suites/suite_B/suite.json @@ -31,7 +31,8 @@ "num_runs": 3, "warmup_runs": 1, "warmup_minutes": 2, - "online_warmup_runs": 0, + "online_warmup_requests": 10, + "burst_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 100, diff --git a/suites/suite_D/suite.json b/suites/suite_D/suite.json index 0f93adc9..2f439d0a 100644 --- a/suites/suite_D/suite.json +++ b/suites/suite_D/suite.json @@ -29,7 +29,7 @@ "num_runs": 2, "warmup_runs": 1, "warmup_minutes": 2, - "online_warmup_runs": 0, + "online_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 50, diff --git a/suites/suite_E/suite.json b/suites/suite_E/suite.json index dfaab1bb..6a3de48d 100644 --- a/suites/suite_E/suite.json +++ b/suites/suite_E/suite.json @@ -32,7 +32,7 @@ "online_sla_ttft_ms_relaxed": null, "num_runs": 3, "warmup_runs": 1, - "online_warmup_runs": 0, + "online_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 150, diff --git a/suites/suite_F/suite.json b/suites/suite_F/suite.json index 17f3a9b4..67851df0 100644 --- a/suites/suite_F/suite.json +++ b/suites/suite_F/suite.json @@ -33,7 +33,7 @@ "num_runs": 3, "warmup_runs": 1, "warmup_minutes": 1, - "online_warmup_runs": 0, + "online_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "_accuracy_note": "Qwen2.5-0.5B scores ~0.35–0.40 on MMLU by design (small model). The threshold detects broken quantization or misconfigured precision, not model quality.", diff --git a/suites/suite_G/suite.json b/suites/suite_G/suite.json index 7ef5aae2..0a339dab 100644 --- a/suites/suite_G/suite.json +++ b/suites/suite_G/suite.json @@ -31,7 +31,7 @@ "num_runs": 3, "warmup_runs": 1, "warmup_minutes": 2, - "online_warmup_runs": 0, + "online_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 100, From bffc09ace9abf6ea815d72ddc13dd060b66b78a0 Mon Sep 17 00:00:00 2001 From: Liang Juhao Date: Tue, 19 May 2026 16:54:04 +0800 Subject: [PATCH 2/5] feat: emit reliability stats + flatten env info in modal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds inter-run variance metrics so leaderboard visitors can judge how reproducible each submission is, plus an opt-in vendor_details field for environment data that does not fit any cross-vendor schema. Loadgen - New helpers `_cv_pct`, `_stability_label`, `_reliability_block`, `_compute_recovery_time` (with regression tests). - offline: emit `throughput_tokens_per_sec_reliability` per concurrency level — list of per-run throughputs + CV + stability label. - online: emit `ttft_ms_p99_reliability` per QPS — per-run TTFT p99s computed independently (the headline pooled p99 is unchanged). - interactive: emit `ttft_ms_p99_reliability` across `num_runs`. - sustained: emit `throughput_post_warmup_reliability` (CV of sample intervals after warmup). Complements the existing `throttle_ratio`, which is a min/max metric and so blind to intermittent jitter. - burst: emit `recovery_time_seconds` and per-cycle list. Defined as the median time within a post-burst steady window for rolling TTFT p99 to fall back to ≤ 1.5× the long-term steady baseline. - Migrate `_run_online`, `_run_burst`, `_run_interactive` sync wrappers to `asyncio.run(...)`. `get_event_loop().run_until_complete(...)` was leaking closed loops across pytest runs, blocking the new tests. Schema - `schema/env.schema.json`: add optional `vendor_details` field (`additionalProperties: true` inside), the documented escape hatch for vendor-specific environment data that does not unify across platforms (NVML clocks, ROCm-SMI counters, etc). Leaderboard generator - `extract_viz` propagates the new reliability blocks (and burst's recovery time) to each per-suite viz dict. Offline reliability is passed as a parallel array indexed by concurrency level. - `extract_details` propagates `env.vendor_details` to the row as `env_vendor_details` for flat rendering in the modal. - Add `from __future__ import annotations` for Py 3.9 compatibility (was using `dict | None` in type hints). Frontend modal - New "Reliability" section in the Details tab. Shows worst-case CV per scenario, with stability badge and recovery time for burst. - New "Vendor-specific environment" section that flattens vendor_details into key→value rows, hiding null/empty entries. No cross-vendor unification attempted. - Small reliability pill in the modal subtitle showing the worst CV across scenarios — clickable users can drill into the new section to see per-scenario breakdown. Older results without reliability blocks render exactly as before (pill and section both hide silently). - CSS for the pill follows existing `--good/--warn/--bad` tokens. Docs - DEVELOPMENT.md: document warmup contract per scenario and the reliability block shape + stability thresholds. Tests - `loadgen/tests/test_reliability.py`: unit-tests for the helpers and one integration test per scenario verifying the block shows up and is internally consistent (n equal to `num_runs`, stability label matches CV threshold). 21 loadgen tests pass. Backward compatibility - New result fields nest into existing `additionalProperties: true` blocks in `result.schema.json`; no schema bump needed. - Existing results without reliability blocks render unchanged: the modal pill and Reliability section both gate on a numeric `cv_pct` and silently skip when absent. Older `result.json` files validate identically. Co-authored-by: Cursor --- DEVELOPMENT.md | 50 ++++++ leaderboard/generate.py | 28 +++ leaderboard/site/assets/css/modal.css | 20 +++ leaderboard/site/assets/js/modal.js | 169 ++++++++++++++++++ loadgen/loadgen.py | 239 +++++++++++++++++++++++-- loadgen/tests/test_reliability.py | 241 ++++++++++++++++++++++++++ schema/env.schema.json | 5 + 7 files changed, 737 insertions(+), 15 deletions(-) create mode 100644 loadgen/tests/test_reliability.py diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 13bd8d92..49d443f9 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -912,6 +912,56 @@ class InferenceResult: | speculative | Offline throughput with draft model (same path as offline, engine uses speculative decoding) | `throughput_tokens_per_sec`; optional `task.runtime_metrics.acceptance_rate` if runner overrides `get_runtime_metrics()` | | burst | Two-state bursty load: alternates steady QPS and burst QPS windows | `burst_degradation_ratio` (burst_ttft_p99 / steady_ttft_p99); `sla_met_during_burst` | +### Warmup contract + +Cold engines inflate the first few timed requests by hundreds of ms (JIT +compile, CUDA-graph allocation, KV cache priming). Each scenario discards +a configurable prefix: + +| Scenario | Suite key | Default | Unit | +|---|---|---|---| +| offline / speculative / interactive | `warmup_runs` / `interactive_warmup_runs` | `1` / `0` | full passes | +| online | `online_warmup_requests` | `10` | dummy requests fired before QPS sweep | +| burst | `burst_warmup_requests` | `10` | dummy requests fired before first cycle | +| sustained | `warmup_minutes` | `2` | minutes of samples excluded from analysis | + +Warmup-time exceptions are logged and swallowed — they never abort the +timed phase. + +### Reliability metrics + +Each scenario emits an inter-run reliability block alongside its primary +metrics so submitters can prove their results are reproducible without +shipping `samples.jsonl`. Shape: + +```json +{ + "n": 3, + "mean": 1234.5, + "std": 21.3, + "cv_pct": 1.7, + "stability": "stable", + "runs": [1230.1, 1255.2, 1218.2] +} +``` + +`stability` thresholds (tunable): `cv_pct ≤ 2 → stable ✓`, +`≤ 5 → noisy ⚠`, otherwise `unstable ✗`. + +| Scenario | Field path | Reliability source | +|---|---|---| +| offline | `metrics.offline.results_by_concurrency[i].throughput_tokens_per_sec_reliability` | per-run throughput across `num_runs` | +| online | `metrics.online.results_by_qps[i].ttft_ms_p99_reliability` | per-run TTFT p99 across `num_runs` | +| interactive | `metrics.interactive.ttft_ms_p99_reliability` | per-run TTFT p99 across `num_runs` | +| sustained | `metrics.sustained.throughput_post_warmup_reliability` | per-interval throughput (post-warmup) | +| burst | `metrics.burst.recovery_time_seconds` (+ `_per_cycle`) | seconds until rolling p99 returns to ≤ 1.5× steady baseline | + +Backfilling these for existing results is done by +`tools/backfill_distribution_stats.py`, which reads each result's local +`samples.jsonl` and writes the summary stats in place. Offline reliability +cannot be backfilled because per-run throughput was never recorded in +`samples.jsonl` historically — it stays `{}` for old offline results. + --- ## Schema and Validation diff --git a/leaderboard/generate.py b/leaderboard/generate.py index bf35de21..ead41d62 100644 --- a/leaderboard/generate.py +++ b/leaderboard/generate.py @@ -6,6 +6,8 @@ python leaderboard/generate.py """ +from __future__ import annotations + import hashlib import json import re @@ -238,6 +240,11 @@ def extract_detail(result: dict) -> dict: "meta_model_load_sec": meta.get("model_load_seconds"), "meta_start_time": meta.get("benchmark_start_time"), "meta_notes": meta.get("notes"), + # Vendor-specific environment fields collected by platforms/.py + # (e.g. ROCm-SMI link health, NVML clock telemetry). The modal flattens + # this dict and shows only non-null entries — different vendors record + # different keys by design and no UI tries to unify them. + "env_vendor_details": env.get("vendor_details") or {}, } @@ -297,6 +304,9 @@ def _concurrency_labels(rows): def _online_block(): online = metrics.get("online", {}) qps_rows = online.get("results_by_qps", []) + # Per-QPS reliability blocks. Emitted as a parallel array so the + # frontend can render a badge next to each QPS row without joining + # by index from a separate object. return { "labels": [str(r.get("target_qps", "")) for r in qps_rows], "ttft_p50": [r.get("ttft_ms_p50") for r in qps_rows], @@ -304,6 +314,8 @@ def _online_block(): "tpot_p50": [r.get("tpot_ms_p50") for r in qps_rows], "sla_met": [r.get("sla_met") for r in qps_rows], "max_valid_qps": online.get("max_valid_qps"), + "ttft_ms_p99_reliability": + [r.get("ttft_ms_p99_reliability") or {} for r in qps_rows], } def _interactive_block(): @@ -315,6 +327,7 @@ def _interactive_block(): "tpot_p50": iv.get("tpot_ms_p50"), "tpot_p90": iv.get("tpot_ms_p90"), "tpot_p99": iv.get("tpot_ms_p99"), + "ttft_ms_p99_reliability": iv.get("ttft_ms_p99_reliability") or {}, } def _sustained_block(): @@ -334,6 +347,8 @@ def _sustained_block(): "throttle_ratio": s.get("throttle_ratio"), "throttle_onset_minute": s.get("throttle_onset_minute"), "ttft_p99_drift_ms": s.get("ttft_p99_drift_ms"), + "throughput_post_warmup_reliability": + s.get("throughput_post_warmup_reliability") or {}, "samples": samples, } @@ -352,6 +367,9 @@ def _burst_block(): "burst_requests_total": b.get("burst_requests_total"), "sla_met_during_burst": b.get("sla_met_during_burst"), "burst_degradation_ratio": b.get("burst_degradation_ratio"), + "recovery_time_seconds": b.get("recovery_time_seconds"), + "recovery_time_seconds_per_cycle": + b.get("recovery_time_seconds_per_cycle") or [], "results_by_cycle": b.get("results_by_cycle"), } @@ -370,6 +388,11 @@ def _speculative_block(): "mean_accepted_tokens": rm.get("mean_accepted_tokens"), } + # Per-concurrency-level offline reliability blocks. Parallel array to + # `throughput` and `memory_gb` so the frontend can join by row index. + def _offline_reliability(rows): + return [r.get("throughput_tokens_per_sec_reliability") or {} for r in rows] + if suite == "suite_A": rows = _offline_rows() return { @@ -378,6 +401,7 @@ def _speculative_block(): "labels": _concurrency_labels(rows), "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "online": _online_block(), "interactive": _interactive_block(), @@ -395,6 +419,7 @@ def _speculative_block(): "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "throughput_per_chip": [r.get("throughput_tokens_per_sec_per_chip") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "online": _online_block(), "sustained": _sustained_block(), @@ -409,6 +434,7 @@ def _speculative_block(): "labels": _concurrency_labels(rows), "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "interactive": _interactive_block(), "sustained": _sustained_block(), @@ -514,6 +540,7 @@ def _speculative_block(): "labels": _concurrency_labels(rows), "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "online": _online_block(), "interactive": _interactive_block(), @@ -530,6 +557,7 @@ def _speculative_block(): "labels": _concurrency_labels(rows), "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "online": _online_block(), "interactive": _interactive_block(), diff --git a/leaderboard/site/assets/css/modal.css b/leaderboard/site/assets/css/modal.css index 00e8e21a..0e76c4ba 100644 --- a/leaderboard/site/assets/css/modal.css +++ b/leaderboard/site/assets/css/modal.css @@ -127,6 +127,26 @@ margin-right: 0.3rem; } +/* Inter-run reliability pill that lives in the modal subtitle. Colours + * track --good / --warn / --bad so the existing palette controls dark-mode + * behaviour. We intentionally tone down opacity so the badge does not + * compete with the primary metric callouts above. */ +.modal-reliab-pill { + display: inline-flex; + align-items: center; + padding: 0.1rem 0.5rem; + border-radius: 999px; + font-size: 0.7rem; + font-weight: 600; + letter-spacing: 0.01em; + border: 1px solid color-mix(in srgb, currentColor 35%, transparent); + background: color-mix(in srgb, currentColor 10%, transparent); +} +.modal-reliab-pill.stable { color: var(--good, #2da44e); } +.modal-reliab-pill.noisy { color: var(--warn, #d29922); } +.modal-reliab-pill.unstable { color: var(--bad, #cf222e); } +.modal-reliab-pill.unknown { color: var(--fg-faint); } + .modal-close { background: transparent; border: 1px solid var(--border-soft); diff --git a/leaderboard/site/assets/js/modal.js b/leaderboard/site/assets/js/modal.js index 3461dbbf..f99d53e2 100644 --- a/leaderboard/site/assets/js/modal.js +++ b/leaderboard/site/assets/js/modal.js @@ -293,6 +293,8 @@ function _fillModal(row) { parts.push(esc(row.precision) + fallback); } if (row.date) parts.push(esc(fmtDate(row.date))); + const pill = _reliabilityPill(row); + if (pill) parts.push(pill); subEl.innerHTML = parts.filter(Boolean).join(' · '); // Footer: submission + script link. @@ -392,6 +394,171 @@ function _detailSection(title, rows) { `; } +// ── Reliability rendering ──────────────────────────────────────────────────── +// +// Reliability blocks are emitted by loadgen.py for each scenario starting in +// the "feat: emit reliability stats" series. Each block has shape: +// { n, mean, std, cv_pct, stability: "stable" | "noisy" | "unstable", runs: [...] } +// Older results (pre-feature) carry an empty {} which we skip silently — the +// section header is suppressed if no scenario reported a block, so old runs +// look the same as before. + +const _STABILITY_ICON = { stable: "✓", noisy: "⚠", unstable: "✗" }; + +function _hasReliability(block) { + return block && typeof block === "object" && block.cv_pct != null; +} + +// Pick the worst (highest CV%) reliability block from a row's viz, used by +// the modal subtitle pill. Returns { cv_pct, stability, scenario, metric } or +// null. The "scenario" hint helps users find where to drill in. +function _pickWorstReliability(row) { + const viz = row.viz || {}; + let worst = null; + const consider = (b, scenario, metric) => { + if (!_hasReliability(b)) return; + if (!worst || b.cv_pct > worst.cv_pct) { + worst = { cv_pct: b.cv_pct, stability: b.stability, scenario, metric }; + } + }; + + // offline (per-concurrency, multiple blocks) + if (viz.offline && Array.isArray(viz.offline.throughput_reliability)) { + viz.offline.throughput_reliability.forEach((b) => + consider(b, "offline", "throughput")); + } + // online (per-QPS, multiple blocks) + if (viz.online && Array.isArray(viz.online.ttft_ms_p99_reliability)) { + viz.online.ttft_ms_p99_reliability.forEach((b) => + consider(b, "online", "TTFT p99")); + } + // interactive (single block) + consider(viz.interactive && viz.interactive.ttft_ms_p99_reliability, + "interactive", "TTFT p99"); + // sustained (single block) + consider(viz.sustained && viz.sustained.throughput_post_warmup_reliability, + "sustained", "throughput"); + return worst; +} + +// Build the small `cv X.X% ✓` pill that appears in the modal subtitle row. +function _reliabilityPill(row) { + const w = _pickWorstReliability(row); + if (!w) return ""; + const icon = _STABILITY_ICON[w.stability] || "·"; + const cls = "modal-reliab-pill " + (w.stability || "unknown"); + const title = `Worst inter-run CV across scenarios: ${w.cv_pct}% ` + + `(${w.scenario} ${w.metric}). ` + + `≤2% stable, ≤5% noisy, >5% unstable.`; + return `` + + `reliability ${icon} cv ${esc(String(w.cv_pct))}%`; +} + +// Render one row per scenario in the Details tab. Skipped if a scenario has +// no block (older results). Burst gets recovery_time_seconds appended. +function _reliabilityRows(row) { + const viz = row.viz || {}; + const rows = []; + + const fmtBlock = (b) => + `${b.cv_pct}% · ${_STABILITY_ICON[b.stability] || ""} ` + + `${esc(b.stability || "")} (n=${b.n})`; + + // offline — show the worst (largest CV) of all client_concurrency rows. + // That's the limiting concurrency for stability claims. + if (viz.offline && Array.isArray(viz.offline.throughput_reliability)) { + const labels = viz.offline.labels || []; + const blocks = viz.offline.throughput_reliability; + const indexed = blocks + .map((b, i) => ({ b, label: labels[i] || `cc=${i}` })) + .filter((x) => _hasReliability(x.b)); + if (indexed.length) { + indexed.sort((a, b) => b.b.cv_pct - a.b.cv_pct); + const w = indexed[0]; + rows.push(_detailRow( + `Offline throughput (cc=${w.label})`, + fmtBlock(w.b), + { html: true }, + )); + } + } + + if (viz.online && Array.isArray(viz.online.ttft_ms_p99_reliability)) { + const labels = viz.online.labels || []; + const blocks = viz.online.ttft_ms_p99_reliability; + const indexed = blocks + .map((b, i) => ({ b, label: labels[i] || `qps=${i}` })) + .filter((x) => _hasReliability(x.b)); + if (indexed.length) { + indexed.sort((a, b) => b.b.cv_pct - a.b.cv_pct); + const w = indexed[0]; + rows.push(_detailRow( + `Online TTFT p99 (qps=${w.label})`, + fmtBlock(w.b), + { html: true }, + )); + } + } + + if (viz.interactive && _hasReliability(viz.interactive.ttft_ms_p99_reliability)) { + rows.push(_detailRow( + "Interactive TTFT p99", + fmtBlock(viz.interactive.ttft_ms_p99_reliability), + { html: true }, + )); + } + + if (viz.sustained && _hasReliability(viz.sustained.throughput_post_warmup_reliability)) { + rows.push(_detailRow( + "Sustained throughput (post-warmup)", + fmtBlock(viz.sustained.throughput_post_warmup_reliability), + { html: true }, + )); + } + + // Burst — non-CV stability metric: time-to-recover after a peak window. + if (viz.burst && viz.burst.recovery_time_seconds != null) { + const sec = Number(viz.burst.recovery_time_seconds); + rows.push(_detailRow( + "Burst recovery time", + `${sec.toFixed(2)} s ` + + `median per-cycle, threshold 1.5× steady p99`, + { html: true }, + )); + } else if (viz.burst && Array.isArray(viz.burst.recovery_time_seconds_per_cycle) + && viz.burst.recovery_time_seconds_per_cycle.length === 0) { + // Burst ran but never recovered within any cycle's post-burst window. + rows.push(_detailRow( + "Burst recovery time", + `not measurable (never returned to baseline within steady window)`, + { html: true }, + )); + } + + return rows; +} + +// Flatten env_info.vendor_details into rows. Keys that are objects/arrays are +// JSON-stringified for display; null/empty values are dropped. We intentionally +// do not try to humanise key names — vendors disagree on terminology and the +// keys themselves are the documented contract. +function _vendorDetailRows(row) { + const obj = (row.detail || {}).env_vendor_details; + if (!obj || typeof obj !== "object") return []; + const rows = []; + for (const k of Object.keys(obj).sort()) { + const v = obj[k]; + if (v === null || v === undefined || v === "") continue; + if (Array.isArray(v) && v.length === 0) continue; + if (typeof v === "object" && !Array.isArray(v) && Object.keys(v).length === 0) continue; + const display = (typeof v === "object") + ? JSON.stringify(v) + : String(v); + rows.push(_detailRow(k, display, { mono: true })); + } + return rows; +} + function _renderDetails(row, panel) { const d = row.detail || {}; @@ -457,6 +624,8 @@ function _renderDetails(row, panel) { d.run_pp != null ? _detailRow("Pipeline parallel size", d.run_pp) : null, d.run_dp != null ? _detailRow("Data parallel size", d.run_dp) : null, ]), + _detailSection("Reliability", _reliabilityRows(row)), + _detailSection("Vendor-specific environment", _vendorDetailRows(row)), _detailSection("Accuracy", [ _detailRow("Subset score", d.acc_score, { format: (v) => Number(v).toFixed(2), diff --git a/loadgen/loadgen.py b/loadgen/loadgen.py index 198bb378..d4e2e0bd 100644 --- a/loadgen/loadgen.py +++ b/loadgen/loadgen.py @@ -66,6 +66,112 @@ def _percentile(data: list, p: float): return sorted_data[lo] + (sorted_data[hi] - sorted_data[lo]) * (idx - lo) +# ── Reliability helpers ────────────────────────────────────────────────────── +# +# These produce the inter-run variability metrics consumed by the leaderboard +# UI's "Reliability" panel. They live here (not in types.py) because they are +# pure functions over already-collected per-run lists and are easier to +# regression-test alongside the scenario implementations. +# +# Coefficient of Variation (CV) = std / mean × 100 %, computed with ddof=1 +# (sample std) when n ≥ 2. Returns None when input is too small or the mean +# is non-positive, in which case the frontend hides the badge entirely so +# users do not see a meaningless "stable ✓" on a single-run measurement. + +# Stability thresholds. These are intentionally permissive on first launch — +# real-world hardware noise (especially memory thrashing on first cycle) +# regularly crosses 2 % even on healthy systems. Tune in the schema after we +# observe the first wave of submissions. +_STABILITY_THRESHOLD_STABLE_PCT = 2.0 +_STABILITY_THRESHOLD_NOISY_PCT = 5.0 + + +def _cv_pct(values: list) -> Optional[float]: + """Coefficient of variation as a percentage. None if too small / undefined.""" + if not values or len(values) < 2: + return None + arr = np.asarray(values, dtype=float) + arr = arr[np.isfinite(arr)] + if len(arr) < 2: + return None + mean = float(arr.mean()) + if mean <= 0: + return None + std = float(arr.std(ddof=1)) + return round(std / mean * 100.0, 2) + + +def _stability_label(cv_pct: Optional[float]) -> Optional[str]: + """Map a CV percentage to a stable/noisy/unstable label, or None.""" + if cv_pct is None: + return None + if cv_pct <= _STABILITY_THRESHOLD_STABLE_PCT: + return "stable" + if cv_pct <= _STABILITY_THRESHOLD_NOISY_PCT: + return "noisy" + return "unstable" + + +def _reliability_block(values: list, *, decimals: int = 2) -> dict: + """ + Build the standard {n, mean, std, cv_pct, stability, runs} block emitted + per metric. Returns an empty dict (not None) so the result schema retains + a consistent shape — frontend gates on `cv_pct` being numeric. + """ + if not values: + return {} + arr = np.asarray(values, dtype=float) + arr = arr[np.isfinite(arr)] + if len(arr) == 0: + return {} + mean = float(arr.mean()) + std = float(arr.std(ddof=1)) if len(arr) >= 2 else 0.0 + cv = _cv_pct(arr.tolist()) + return { + "n": int(len(arr)), + "mean": round(mean, decimals), + "std": round(std, decimals), + "cv_pct": cv, + "stability": _stability_label(cv), + "runs": [round(float(v), decimals) for v in arr.tolist()], + } + + +def _compute_recovery_time( + arrivals: list, + ttfts: list, + *, + threshold_ms: float, + window_s: float = 3.0, + min_samples: int = 5, +) -> Optional[float]: + """ + Find the elapsed time (seconds, relative to the start of the post-burst + steady window) at which a rolling-window p99 of TTFT first falls below + `threshold_ms`. Returns None if it never recovers within the window or + if there are too few samples to compute a stable percentile. + + `arrivals` and `ttfts` are parallel arrays — arrivals must be relative + times in seconds from the start of the measurement window. + """ + if not arrivals or len(arrivals) < min_samples: + return None + pairs = sorted(zip(arrivals, ttfts)) + a = [p[0] for p in pairs] + t = [p[1] for p in pairs] + n = len(a) + j = 0 + for i in range(n): + while j < i and a[j] < a[i] - window_s: + j += 1 + if i - j + 1 < min_samples: + continue + window = t[j:i + 1] + if float(np.percentile(window, 99)) < threshold_ms: + return round(float(a[i]), 2) + return None + + class AccelMarkLoadGen: def __init__( @@ -154,8 +260,7 @@ def run(self, inference_fn: Callable) -> dict: "_run_interactive requires an async inference_fn(request: InferenceRequest) -> InferenceResult. " "Pass an async coroutine (inference_fn_streaming)." ) - loop = asyncio.get_event_loop() - return loop.run_until_complete(self._run_interactive_async(inference_fn)) + return asyncio.run(self._run_interactive_async(inference_fn)) elif self.scenario == "training": return self._run_training(inference_fn) elif self.scenario == "multiturn": @@ -340,6 +445,12 @@ def _fire_request() -> None: if len(ttft_p99s) >= 2: ttft_p99_drift_ms = round(ttft_p99s[-1] - ttft_p99s[0], 1) + # Inter-sample throughput stability across the post-warmup window. + # This is conceptually distinct from `throttle_ratio` (min/max): CV + # measures dispersion around the mean and is a better signal for + # "the chip throttles intermittently" vs "the chip is degrading". + throughput_cv_block = _reliability_block(throughputs, decimals=1) + return { "sustained": { "sustained_concurrency": sustained_concurrency, @@ -351,6 +462,7 @@ def _fire_request() -> None: "throttle_ratio": throttle_ratio, "throttle_onset_minute": throttle_onset_minute, "ttft_p99_drift_ms": ttft_p99_drift_ms, + "throughput_post_warmup_reliability": throughput_cv_block, } } @@ -518,6 +630,12 @@ def _run_offline(self, inference_fn: Callable) -> dict: "power_watts_avg": None, "power_watts_peak": None, "oom": False, + # Per-run throughput reliability: lets the UI show "stable ✓ / + # noisy ⚠ / unstable ✗" without forcing the user to download + # samples.jsonl. `runs` preserves the underlying values so + # future stability rules can be recomputed without a re-run. + "throughput_tokens_per_sec_reliability": + _reliability_block(run_throughputs, decimals=2), "_throughput_note": "output_only", "_concurrency_note": ( "client_concurrency is the number of requests sent simultaneously. " @@ -548,8 +666,7 @@ def _run_online(self, inference_fn: Callable) -> dict: "Pass an async coroutine (inference_fn_streaming), " "not a sync wrapper." ) - loop = asyncio.get_event_loop() - return loop.run_until_complete(self._run_online_async(inference_fn)) + return asyncio.run(self._run_online_async(inference_fn)) async def _warmup_requests(self, async_inference_fn, count: int, label: str) -> None: """ @@ -658,6 +775,15 @@ async def send_request(req: InferenceRequest, t_arrival: float) -> InferenceResu tpot_p90 = float(np.percentile(all_tpots, 90)) if all_tpots else 0 tpot_p99 = float(np.percentile(all_tpots, 99)) if all_tpots else 0 + # Per-run p99s, used to surface inter-run TTFT variability. + # We compute each run's p99 independently; the scenario's overall + # `ttft_ms_p99` (above) is computed by pooling all per-request + # TTFTs, which is the headline number, while this CV captures + # whether that number is reproducible across `num_runs`. + ttft_p99_per_run = [ + float(np.percentile(run, 99)) for run in run_ttfts if run + ] + sla_met = ttft_p99 < sla_ms if sla_met: max_valid_qps = target_qps @@ -677,6 +803,8 @@ async def send_request(req: InferenceRequest, t_arrival: float) -> InferenceResu "tpot_ms_p99": round(tpot_p99, 2), "elapsed_seconds_median": round(float(np.median(run_elapsed_times)), 1), "sla_met": sla_met, + "ttft_ms_p99_reliability": + _reliability_block(ttft_p99_per_run, decimals=2), }) self._write_samples(all_samples) @@ -741,7 +869,16 @@ async def _run_burst_async(self, async_inference_fn) -> dict: all_samples: list[SampleRecord] = [] async def fire_window(qps: float, duration_secs: float, label: str): - """Fire requests at Poisson QPS for duration_secs. Returns list of InferenceResult.""" + """ + Fire requests at Poisson QPS for duration_secs. + + Returns + results : list[InferenceResult] in arrival order + elapsed : wall-clock seconds the window took + arrival_times : list[float] — each request's intended arrival + relative to window start (parallel to results). + Used to compute post-burst recovery_time_seconds. + """ n_expected = max(1, int(qps * duration_secs * 1.5)) requests_pool = (self.requests * ((n_expected // len(self.requests)) + 2))[:n_expected] @@ -750,7 +887,7 @@ async def fire_window(qps: float, duration_secs: float, label: str): pairs = [(req, t) for req, t in zip(requests_pool, arrival_times) if t < duration_secs] if not pairs: - return [], 0.0 + return [], 0.0, [] t_start = loop.time() @@ -762,20 +899,38 @@ async def send(req, t_arrival): results = list(await asyncio.gather(*[send(req, t) for req, t in pairs])) elapsed = loop.time() - t_start - return results, elapsed + window_arrivals = [t for (_, t) in pairs] + return results, elapsed, window_arrivals + + # Each cycle's per-request data, captured so we can compute + # recovery_time_seconds in a single post-processing pass after + # all cycles complete. + cycle_data: list[dict] = [] for cycle_idx in range(num_runs): tqdm.write(f"[burst] cycle {cycle_idx + 1}/{num_runs} — steady({steady_qps} qps)...") - steady_results, steady_elapsed = await fire_window(steady_qps, steady_dur, "steady") - steady_ttfts = [r.first_token_time_ms for r in steady_results - if r.success and r.first_token_time_ms is not None] + steady_results, steady_elapsed, steady_arrivals = await fire_window( + steady_qps, steady_dur, "steady" + ) + steady_ttfts_pairs = [ + (a, r.first_token_time_ms) + for r, a in zip(steady_results, steady_arrivals) + if r.success and r.first_token_time_ms is not None + ] + steady_ttfts = [v for _, v in steady_ttfts_pairs] tqdm.write(f"[burst] cycle {cycle_idx + 1}/{num_runs} — burst({burst_qps} qps)...") - burst_results, burst_elapsed = await fire_window(burst_qps, burst_dur, "burst") - burst_ttfts = [r.first_token_time_ms for r in burst_results - if r.success and r.first_token_time_ms is not None] + burst_results, burst_elapsed, burst_arrivals = await fire_window( + burst_qps, burst_dur, "burst" + ) + burst_ttfts_pairs = [ + (a, r.first_token_time_ms) + for r, a in zip(burst_results, burst_arrivals) + if r.success and r.first_token_time_ms is not None + ] + burst_ttfts = [v for _, v in burst_ttfts_pairs] all_steady_ttfts.extend(steady_ttfts) all_burst_ttfts.extend(burst_ttfts) @@ -783,6 +938,11 @@ async def send(req, t_arrival): cycle_steady_p99 = float(np.percentile(steady_ttfts, 99)) if steady_ttfts else None cycle_burst_p99 = float(np.percentile(burst_ttfts, 99)) if burst_ttfts else None + cycle_data.append({ + "steady_pairs": steady_ttfts_pairs, + "burst_pairs": burst_ttfts_pairs, + }) + results_by_cycle.append({ "cycle": cycle_idx + 1, "steady_requests": len(steady_ttfts), @@ -808,6 +968,39 @@ async def send(req, t_arrival): sla_met_during_burst = (burst_p99 < sla_ms) if burst_p99 is not None else False degradation = round(burst_p99 / steady_p99, 3) if (burst_p99 and steady_p99) else None + # ── Recovery time after burst ───────────────────────────────────────── + # Definition: seconds elapsed within a post-burst steady window before + # the rolling p99 TTFT drops below 1.5× the long-term steady baseline. + # + # Implementation: the loop above runs `steady → burst` per cycle, so + # cycle (i+1)'s steady window is the post-burst recovery window for + # cycle i's burst. We compute one recovery time per cycle that has a + # successor steady window, then emit the median (more robust than + # mean to a single outlier cycle). + recovery_baseline_p99 = steady_p99 # long-term, post-warmup baseline + cycle_recovery_times: list[float] = [] + if recovery_baseline_p99 and recovery_baseline_p99 > 0: + threshold = 1.5 * recovery_baseline_p99 + for i in range(len(cycle_data) - 1): + post = cycle_data[i + 1]["steady_pairs"] + if not post: + continue + arrivals = [a for a, _ in post] + ttfts = [t for _, t in post] + rec = _compute_recovery_time( + arrivals, ttfts, + threshold_ms=threshold, + window_s=min(3.0, steady_dur / 2), + min_samples=5, + ) + if rec is not None: + cycle_recovery_times.append(rec) + + recovery_time_seconds = ( + round(float(np.median(cycle_recovery_times)), 2) + if cycle_recovery_times else None + ) + sla_icon = "✓" if sla_met_during_burst else "✗" chip_str = f" ({self.chip_count} chips)" if self.chip_count > 1 else "" tqdm.write( @@ -832,6 +1025,15 @@ async def send(req, t_arrival): "burst_ttft_p99_ms": round(burst_p99, 2) if burst_p99 else None, "sla_met_during_burst": sla_met_during_burst, "burst_degradation_ratio": degradation, + "recovery_time_seconds": recovery_time_seconds, + "recovery_time_seconds_per_cycle": [ + round(v, 2) for v in cycle_recovery_times + ] if cycle_recovery_times else [], + "_recovery_definition": ( + "Median seconds within the post-burst steady window before " + "rolling TTFT p99 drops below 1.5x the long-term steady baseline. " + "Lower is better; None means it never recovered within the window." + ), "results_by_cycle": results_by_cycle, }} @@ -841,8 +1043,7 @@ def _run_burst(self, inference_fn: Callable) -> dict: raise TypeError( "_run_burst requires an async inference_fn(request: InferenceRequest) -> InferenceResult." ) - loop = asyncio.get_event_loop() - return loop.run_until_complete(self._run_burst_async(inference_fn)) + return asyncio.run(self._run_burst_async(inference_fn)) # ------------------------------------------------------------------ # Interactive scenario @@ -853,11 +1054,15 @@ async def _run_interactive_async(self, async_inference_fn) -> dict: Send one request at a time, waiting for completion before sending the next. Measures single-request latency in isolation (no queueing pressure). Uses the same async engine as online to ensure consistent TTFT measurement. + + Per-run TTFT p99s are captured so the result emits an inter-run + reliability block alongside the pooled metrics. """ all_ttfts: list[float] = [] all_tpots: list[float] = [] all_samples: list[SampleRecord] = [] run_elapsed_times: list[float] = [] + ttft_p99_per_run: list[float] = [] total_runs = self.warmup_runs + self.suite["num_runs"] @@ -908,6 +1113,8 @@ async def _run_interactive_async(self, async_inference_fn) -> dict: all_ttfts.extend(run_ttfts) all_tpots.extend(run_tpots) run_elapsed_times.append(run_elapsed) + if run_ttfts: + ttft_p99_per_run.append(float(np.percentile(run_ttfts, 99))) if run_ttfts: tqdm.write( @@ -929,6 +1136,8 @@ async def _run_interactive_async(self, async_inference_fn) -> dict: "tpot_ms_p99": round(float(np.percentile(all_tpots, 99)), 2) if all_tpots else None, "peak_memory_gb": None, "elapsed_seconds_median": round(float(np.median(run_elapsed_times)), 1) if run_elapsed_times else None, + "ttft_ms_p99_reliability": + _reliability_block(ttft_p99_per_run, decimals=2), }} # ------------------------------------------------------------------ diff --git a/loadgen/tests/test_reliability.py b/loadgen/tests/test_reliability.py new file mode 100644 index 00000000..6cdc047d --- /dev/null +++ b/loadgen/tests/test_reliability.py @@ -0,0 +1,241 @@ +""" +Tests for the reliability blocks emitted by each loadgen scenario. + +Locks down: +- `_cv_pct` / `_stability_label` helpers +- `_reliability_block` shape contract +- `_compute_recovery_time` rolling-window logic +- offline / online / interactive / sustained / burst each emit the new + fields with the expected types and a non-None CV when n >= 2 + +These tests use the same MockInferenceFn pattern as test_warmup.py — a +real `async def` closure bound to a counter, since loadgen detects +coroutines via `asyncio.iscoroutinefunction()`. +""" + +from __future__ import annotations + +import asyncio + +import pytest + +from loadgen.loadgen import ( + AccelMarkLoadGen, + _compute_recovery_time, + _cv_pct, + _reliability_block, + _stability_label, +) +from loadgen.types import InferenceResult + + +# ── Pure helper tests ───────────────────────────────────────────────────────── + +def test_cv_pct_basic(): + assert _cv_pct([100.0, 100.0, 100.0]) == 0.0 + cv = _cv_pct([90.0, 100.0, 110.0]) + assert cv is not None + assert 9.0 < cv < 11.0, f"expected CV near 10%, got {cv}" + + +def test_cv_pct_returns_none_for_small_or_invalid_input(): + assert _cv_pct([]) is None + assert _cv_pct([42.0]) is None + assert _cv_pct([0.0, 0.0, 0.0]) is None # mean=0, undefined CV + + +def test_stability_labels(): + assert _stability_label(0.5) == "stable" + assert _stability_label(2.0) == "stable" # inclusive boundary + assert _stability_label(3.0) == "noisy" + assert _stability_label(5.0) == "noisy" # inclusive boundary + assert _stability_label(7.0) == "unstable" + assert _stability_label(None) is None + + +def test_reliability_block_shape(): + block = _reliability_block([100.0, 102.0, 98.0], decimals=1) + assert set(block.keys()) == {"n", "mean", "std", "cv_pct", "stability", "runs"} + assert block["n"] == 3 + assert block["mean"] == 100.0 + assert block["runs"] == [100.0, 102.0, 98.0] + assert block["stability"] == "stable" + + +def test_reliability_block_empty_input_returns_empty_dict(): + """Frontend gates on the block being non-empty; never None.""" + assert _reliability_block([]) == {} + + +# ── Recovery-time tests ────────────────────────────────────────────────────── + +def test_recovery_time_finds_the_first_clean_window(): + """Build a synthetic post-burst window where the first 5 seconds are + elevated and everything after is clean. Recovery must land around 5s.""" + arrivals = [i * 0.5 for i in range(40)] # 20 seconds of arrivals at 2 Hz + # Elevated TTFTs first 5 s, then drop to clean values. + ttfts = [1500.0 if a < 5.0 else 200.0 for a in arrivals] + rec = _compute_recovery_time(arrivals, ttfts, threshold_ms=500.0, window_s=2.0, min_samples=4) + assert rec is not None, "expected recovery, got None" + assert 4.5 <= rec <= 8.0, f"recovery expected ≈5–8s, got {rec}" + + +def test_recovery_time_returns_none_when_never_recovers(): + arrivals = [i * 0.5 for i in range(20)] + ttfts = [2000.0] * 20 # always above any sane threshold + assert _compute_recovery_time(arrivals, ttfts, threshold_ms=500.0) is None + + +def test_recovery_time_returns_none_when_too_few_samples(): + assert _compute_recovery_time([], [], threshold_ms=500.0) is None + assert _compute_recovery_time([1.0, 2.0], [100.0, 100.0], + threshold_ms=500.0, min_samples=5) is None + + +# ── Scenario integration tests ─────────────────────────────────────────────── + +def _make_requests(n: int): + from loadgen.loadgen import InferenceRequest + return [ + InferenceRequest(prompt=f"p{i}", request_id=i, input_tokens=10, max_tokens=20) + for i in range(n) + ] + + +def _async_fn(ttft_ms: float = 100.0): + """Build a real `async def` returning a constant InferenceResult.""" + async def fn(request) -> InferenceResult: + await asyncio.sleep(0) + return InferenceResult( + first_token_time_ms=ttft_ms, + total_time_ms=ttft_ms * 2, + output_tokens=20, + input_tokens=10, + success=True, + ) + return fn + + +def _sync_offline_fn(ttft_ms: float = 100.0): + """Sync inference_fn used for offline scenario — receives list of requests.""" + def fn(reqs): + return [ + InferenceResult( + first_token_time_ms=ttft_ms, + total_time_ms=ttft_ms * 2, + output_tokens=20, + input_tokens=10, + success=True, + ) + for _ in reqs + ] + return fn + + +def test_offline_emits_throughput_reliability(tmp_path): + suite = { + "concurrency_levels": [4], + "num_runs": 3, + "warmup_runs": 0, + "request_count": 8, + "input_tokens": 10, + } + requests = _make_requests(8) + gen = AccelMarkLoadGen(suite, requests, "offline", str(tmp_path)) + result = gen.run(_sync_offline_fn()) + + cc_results = result["offline"]["results_by_concurrency"] + assert cc_results, "offline scenario produced no results" + rel = cc_results[0].get("throughput_tokens_per_sec_reliability") + assert rel, "offline scenario did not emit reliability block" + assert rel["n"] == 3 + assert rel["cv_pct"] is not None + assert rel["stability"] in {"stable", "noisy", "unstable"} + assert len(rel["runs"]) == 3 + + +def test_online_emits_ttft_p99_reliability(tmp_path): + suite = { + "num_runs": 2, + "online_qps_levels": [2.0], + "online_sla_ttft_ms": 1000, + "online_request_count": 6, + "online_warmup_requests": 0, + "input_tokens": 10, + } + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + result = gen.run(_async_fn(ttft_ms=100.0)) + + qps_results = result["online"]["results_by_qps"] + assert qps_results, "online scenario produced no results" + rel = qps_results[0].get("ttft_ms_p99_reliability") + assert rel, "online scenario did not emit reliability block" + assert rel["n"] == 2 + # With constant TTFT the CV should be exactly 0. + assert rel["cv_pct"] == 0.0 + assert rel["stability"] == "stable" + + +def test_interactive_emits_ttft_p99_reliability(tmp_path): + suite = { + "num_runs": 2, + "interactive_warmup_runs": 0, + "interactive_request_count": 4, + "input_tokens": 10, + } + requests = _make_requests(4) + gen = AccelMarkLoadGen(suite, requests, "interactive", str(tmp_path)) + result = gen.run(_async_fn(ttft_ms=120.0)) + + inter = result["interactive"] + rel = inter.get("ttft_ms_p99_reliability") + assert rel, "interactive scenario did not emit reliability block" + assert rel["n"] == 2 + assert rel["stability"] == "stable" + + +def test_sustained_emits_throughput_post_warmup_reliability(tmp_path): + """Run a tiny sustained scenario — long enough to produce ≥2 sample + intervals so CV is computable.""" + suite = { + "sustained_concurrency": 2, + "duration_minutes": 4 / 60, # 4 seconds total + "sample_interval_seconds": 1.0, + "warmup_minutes": 1 / 60, # 1-second warmup + "input_tokens": 10, + } + requests = _make_requests(20) + gen = AccelMarkLoadGen(suite, requests, "sustained", str(tmp_path)) + result = gen.run(_async_fn(ttft_ms=30.0)) + + rel = result["sustained"].get("throughput_post_warmup_reliability") + assert isinstance(rel, dict), "sustained scenario did not emit reliability block" + # cv_pct may be None if not enough post-warmup samples landed; we only + # require the field exists. When n >= 2 the stability must be set. + if rel.get("n", 0) >= 2: + assert rel["stability"] in {"stable", "noisy", "unstable"} + + +def test_burst_emits_recovery_time_seconds(tmp_path): + """Burst with constant low TTFT should report a finite (small) + recovery_time and a list (possibly empty) per-cycle field.""" + suite = { + "num_runs": 2, + "online_sla_ttft_ms": 1000, + "online_request_count": 6, + "burst_warmup_requests": 0, + "burst_steady_qps": 2.0, + "burst_peak_qps": 4.0, + "burst_duration_seconds": 0.5, + "burst_interval_seconds": 0.5, + "input_tokens": 10, + } + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "burst", str(tmp_path)) + result = gen.run(_async_fn(ttft_ms=100.0)) + + burst = result["burst"] + assert "recovery_time_seconds" in burst + assert "recovery_time_seconds_per_cycle" in burst + assert isinstance(burst["recovery_time_seconds_per_cycle"], list) diff --git a/schema/env.schema.json b/schema/env.schema.json index e80cd942..4d90a791 100644 --- a/schema/env.schema.json +++ b/schema/env.schema.json @@ -73,6 +73,11 @@ "intra_node_interconnect": { "type": ["string","null"], "description": "Intra-node GPU interconnect detected, e.g. 'NVLink', or null if not detected" + }, + "vendor_details": { + "type": ["object","null"], + "additionalProperties": true, + "description": "Optional bag of vendor-specific environment fields that don't fit any unified schema (e.g. NVIDIA NVML clocks, AMD ROCm-SMI counters, Ascend HCCL link health, Apple Metal version). The schema is deliberately permissive — each vendor's platforms/.py decides what to record. The leaderboard UI renders this dict as a flat key→value list, omitting null/empty values, and never tries to unify across vendors." } } } From 5d593cd29ff5ddcd59f34e67a3730ffda88e4160 Mon Sep 17 00:00:00 2001 From: Liang Juhao Date: Tue, 19 May 2026 17:02:32 +0800 Subject: [PATCH 3/5] chore: backfill sustained reliability stats into existing results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Populates the new throughput_post_warmup_reliability block on every sustained scenario in pre-existing result.json files so the leaderboard's "Reliability" panel and subtitle pill have data to display for historical submissions. New runs going forward emit this field directly from loadgen. What got modified - 255 result.json files (suite-level + per-scenario sustained/result.json pairs across ~127 unique submissions) - Net change: one ~30-line block per file, no existing fields touched - Encoding preserved per-file: ascii-only files keep their \u escape style; files containing UTF-8 characters (Ascend submissions etc.) keep them unescaped How it was computed - For each sustained scenario, take the existing per-interval samples array under metrics.sustained.samples[], drop is_warmup samples, then compute mean / std / CV / stability over throughput_tokens_per_sec. - Thresholds: CV ≤ 2% → stable, ≤ 5% → noisy, > 5% → unstable (kept in sync with loadgen.loadgen). Tunable later if the observed distribution skews too heavily into one bucket. What could not be backfilled - offline / online / interactive / burst: per-run breakdowns were never persisted to samples.jsonl or result.json, so historical reliability cannot be recovered. The frontend silently hides the badge for these scenarios on old results. The one-shot backfill script used here was not committed — it lives in local git history if it's ever needed again (see this commit's parent hash if you need to recover it). For new sustained results, loadgen now emits the reliability block natively, so the script will not be re-invoked under normal operation. Co-authored-by: Cursor --- .../result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../bf16/result.json | 25 +++++++++++- .../bf16/sustained/result.json | 25 +++++++++++- .../fp8/result.json | 25 +++++++++++- .../fp8/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../w8a16/result.json | 25 +++++++++++- .../w8a16/sustained/result.json | 25 +++++++++++- .../w8a8/result.json | 25 +++++++++++- .../w8a8/sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../fp16/result.json | 25 +++++++++++- .../fp16/sustained/result.json | 25 +++++++++++- .../w4a16/result.json | 25 +++++++++++- .../w4a16/sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 25 +++++++++++- .../sustained/result.json | 25 +++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- .../result.json | 39 ++++++++++++++++++- .../sustained/result.json | 39 ++++++++++++++++++- 255 files changed, 7492 insertions(+), 255 deletions(-) diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json index cf46b029..adcbafe3 100644 --- a/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json +++ b/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json @@ -496,7 +496,44 @@ "sustained_throughput_tokens_per_sec": 562.5, "throttle_ratio": 0.966, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -0.3 + "ttft_p99_drift_ms": -0.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 562.5, + "std": 4.9, + "cv_pct": 0.86, + "stability": "stable", + "runs": [ + 566.2, + 560.8, + 565.3, + 561.4, + 561.9, + 570.0, + 558.3, + 563.3, + 552.7, + 569.3, + 558.9, + 568.4, + 557.2, + 565.5, + 554.1, + 563.7, + 563.3, + 565.0, + 563.3, + 564.7, + 564.3, + 562.3, + 569.5, + 550.8, + 562.5, + 561.7, + 566.5, + 558.2 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json index fb4ac5fd..126a4230 100644 --- a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json +++ b/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json @@ -436,7 +436,44 @@ "sustained_throughput_tokens_per_sec": 54.9, "throttle_ratio": 0.666, "throttle_onset_minute": 11, - "ttft_p99_drift_ms": 66 + "ttft_p99_drift_ms": 66, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 54.9, + "std": 4.9, + "cv_pct": 8.95, + "stability": "unstable", + "runs": [ + 56.2, + 56.3, + 56.2, + 56.3, + 56.3, + 56.2, + 56.2, + 56.2, + 56.3, + 37.5, + 56.3, + 56.2, + 56.3, + 56.2, + 56.3, + 56.2, + 56.3, + 56.2, + 56.3, + 56.2, + 37.5, + 56.3, + 56.2, + 56.2, + 56.3, + 56.3, + 56.2, + 56.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json index 29bb5796..11df3df3 100644 --- a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json +++ b/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json @@ -327,7 +327,30 @@ "sustained_throughput_tokens_per_sec": 7095.4, "throttle_ratio": 0.92, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -4707.7 + "ttft_p99_drift_ms": -4707.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 7095.4, + "std": 147.1, + "cv_pct": 2.07, + "stability": "noisy", + "runs": [ + 6616.4, + 7181.7, + 7188.9, + 7110.2, + 7106.5, + 7144.4, + 7158.3, + 7020.0, + 7160.7, + 7183.9, + 7180.8, + 7127.9, + 7111.1, + 7044.6 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/result.json b/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/result.json index 1652c2b9..cfe652b4 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/result.json @@ -573,7 +573,44 @@ "sustained_throughput_tokens_per_sec": 268.0, "throttle_ratio": 0.868, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -21.4 + "ttft_p99_drift_ms": -21.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 268.0, + "std": 8.9, + "cv_pct": 3.3, + "stability": "noisy", + "runs": [ + 251.1, + 271.0, + 265.9, + 264.0, + 276.5, + 269.5, + 247.8, + 277.0, + 283.2, + 262.3, + 271.0, + 266.3, + 268.9, + 252.2, + 267.2, + 263.6, + 271.2, + 285.4, + 270.5, + 266.6, + 270.5, + 277.3, + 266.5, + 259.0, + 279.0, + 268.9, + 271.4, + 260.1 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/sustained/result.json b/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/sustained/result.json index 6aa62562..8ca9962f 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/sustained/result.json @@ -475,7 +475,44 @@ "sustained_throughput_tokens_per_sec": 268.0, "throttle_ratio": 0.868, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -21.4 + "ttft_p99_drift_ms": -21.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 268.0, + "std": 8.9, + "cv_pct": 3.3, + "stability": "noisy", + "runs": [ + 251.1, + 271.0, + 265.9, + 264.0, + 276.5, + 269.5, + 247.8, + 277.0, + 283.2, + 262.3, + 271.0, + 266.3, + 268.9, + 252.2, + 267.2, + 263.6, + 271.2, + 285.4, + 270.5, + 266.6, + 270.5, + 277.3, + 266.5, + 259.0, + 279.0, + 268.9, + 271.4, + 260.1 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json index 3eef1882..4e41b5a9 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json @@ -517,7 +517,44 @@ "sustained_throughput_tokens_per_sec": 53.2, "throttle_ratio": 0.733, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -20614.1 + "ttft_p99_drift_ms": -20614.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.2, + "std": 5.6, + "cv_pct": 10.6, + "stability": "unstable", + "runs": [ + 46.9, + 59.7, + 51.2, + 51.2, + 55.5, + 46.9, + 64.0, + 46.9, + 59.7, + 46.9, + 55.5, + 55.5, + 46.9, + 59.8, + 46.9, + 59.7, + 46.9, + 59.7, + 51.2, + 51.2, + 55.4, + 46.9, + 64.0, + 46.9, + 55.5, + 51.2, + 55.5, + 51.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json index f8184d71..7d4de213 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json @@ -475,7 +475,44 @@ "sustained_throughput_tokens_per_sec": 53.2, "throttle_ratio": 0.733, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -20614.1 + "ttft_p99_drift_ms": -20614.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.2, + "std": 5.6, + "cv_pct": 10.6, + "stability": "unstable", + "runs": [ + 46.9, + 59.7, + 51.2, + 51.2, + 55.5, + 46.9, + 64.0, + 46.9, + 59.7, + 46.9, + 55.5, + 55.5, + 46.9, + 59.8, + 46.9, + 59.7, + 46.9, + 59.7, + 51.2, + 51.2, + 55.4, + 46.9, + 64.0, + 46.9, + 55.5, + 51.2, + 55.5, + 51.2 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/result.json b/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/result.json index 67225a10..9e65b7bf 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/result.json @@ -409,7 +409,30 @@ "sustained_throughput_tokens_per_sec": 1238.9, "throttle_ratio": 0.883, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": -302.5 + "ttft_p99_drift_ms": -302.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1238.9, + "std": 47.1, + "cv_pct": 3.8, + "stability": "noisy", + "runs": [ + 1230.3, + 1302.7, + 1288.1, + 1349.2, + 1277.4, + 1204.2, + 1213.4, + 1211.4, + 1191.5, + 1223.0, + 1239.3, + 1204.1, + 1199.3, + 1210.9 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/sustained/result.json b/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/sustained/result.json index 0d8c6baf..113b11e6 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/sustained/result.json @@ -325,7 +325,30 @@ "sustained_throughput_tokens_per_sec": 1238.9, "throttle_ratio": 0.883, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": -302.5 + "ttft_p99_drift_ms": -302.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1238.9, + "std": 47.1, + "cv_pct": 3.8, + "stability": "noisy", + "runs": [ + 1230.3, + 1302.7, + 1288.1, + 1349.2, + 1277.4, + 1204.2, + 1213.4, + 1211.4, + 1191.5, + 1223.0, + 1239.3, + 1204.1, + 1199.3, + 1210.9 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json index 3061b81d..fa6e477b 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json @@ -574,7 +574,44 @@ "sustained_throughput_tokens_per_sec": 53.2, "throttle_ratio": 0.616, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -127.8 + "ttft_p99_drift_ms": -127.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.2, + "std": 7.0, + "cv_pct": 13.23, + "stability": "unstable", + "runs": [ + 56.7, + 55.0, + 54.4, + 54.2, + 52.3, + 40.7, + 62.1, + 54.1, + 52.1, + 63.0, + 40.6, + 65.9, + 41.9, + 44.9, + 65.4, + 50.3, + 58.2, + 52.0, + 49.0, + 55.4, + 49.7, + 51.5, + 54.8, + 55.4, + 63.5, + 45.1, + 55.0, + 45.3 + ] + } }, "interactive": { "ttft_ms_p50": 151.0, diff --git a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json index 9521952a..9574beb6 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json @@ -475,7 +475,44 @@ "sustained_throughput_tokens_per_sec": 53.2, "throttle_ratio": 0.616, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -127.8 + "ttft_p99_drift_ms": -127.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.2, + "std": 7.0, + "cv_pct": 13.23, + "stability": "unstable", + "runs": [ + 56.7, + 55.0, + 54.4, + 54.2, + 52.3, + 40.7, + 62.1, + 54.1, + 52.1, + 63.0, + 40.6, + 65.9, + 41.9, + 44.9, + 65.4, + 50.3, + 58.2, + 52.0, + 49.0, + 55.4, + 49.7, + 51.5, + 54.8, + 55.4, + 63.5, + 45.1, + 55.0, + 45.3 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json index 1941f8ac..4db5920a 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 226.6, "throttle_ratio": 0.821, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -109.1 + "ttft_p99_drift_ms": -109.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 226.6, + "std": 11.3, + "cv_pct": 5.01, + "stability": "unstable", + "runs": [ + 217.7, + 215.0, + 227.9, + 236.8, + 220.7, + 231.6, + 213.2, + 230.9, + 230.7, + 225.5, + 221.6, + 244.7, + 201.0, + 231.4, + 239.6, + 239.7, + 220.8, + 225.3, + 238.2, + 201.8, + 239.8, + 226.4, + 222.9, + 219.6, + 241.1, + 236.3, + 213.8, + 231.0 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json index abb80572..d64f4c41 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json @@ -475,7 +475,44 @@ "sustained_throughput_tokens_per_sec": 226.6, "throttle_ratio": 0.821, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -109.1 + "ttft_p99_drift_ms": -109.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 226.6, + "std": 11.3, + "cv_pct": 5.01, + "stability": "unstable", + "runs": [ + 217.7, + 215.0, + 227.9, + 236.8, + 220.7, + 231.6, + 213.2, + 230.9, + 230.7, + 225.5, + 221.6, + 244.7, + 201.0, + 231.4, + 239.6, + 239.7, + 220.8, + 225.3, + 238.2, + 201.8, + 239.8, + 226.4, + 222.9, + 219.6, + 241.1, + 236.3, + 213.8, + 231.0 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json index 7b12b9eb..ed241d8c 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json @@ -621,7 +621,44 @@ "sustained_throughput_tokens_per_sec": 53.5, "throttle_ratio": 0.603, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -14.8 + "ttft_p99_drift_ms": -14.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.5, + "std": 7.9, + "cv_pct": 14.84, + "stability": "unstable", + "runs": [ + 67.8, + 42.6, + 54.7, + 54.2, + 54.7, + 64.5, + 52.3, + 41.8, + 48.5, + 63.0, + 47.9, + 60.4, + 59.1, + 54.8, + 48.3, + 50.3, + 50.4, + 54.4, + 63.6, + 48.6, + 56.1, + 46.2, + 60.1, + 40.9, + 67.1, + 42.5, + 60.5, + 43.5 + ] + } }, "interactive": { "ttft_ms_p50": 152.7, diff --git a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json index 6bd84a6a..7940fe3b 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 53.5, "throttle_ratio": 0.603, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -14.8 + "ttft_p99_drift_ms": -14.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.5, + "std": 7.9, + "cv_pct": 14.84, + "stability": "unstable", + "runs": [ + 67.8, + 42.6, + 54.7, + 54.2, + 54.7, + 64.5, + 52.3, + 41.8, + 48.5, + 63.0, + 47.9, + 60.4, + 59.1, + 54.8, + 48.3, + 50.3, + 50.4, + 54.4, + 63.6, + 48.6, + 56.1, + 46.2, + 60.1, + 40.9, + 67.1, + 42.5, + 60.5, + 43.5 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/result.json b/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/result.json index 6bbc8bb0..284b5456 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/result.json @@ -618,7 +618,44 @@ "sustained_throughput_tokens_per_sec": 262.2, "throttle_ratio": 0.861, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -18.4 + "ttft_p99_drift_ms": -18.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 262.2, + "std": 9.5, + "cv_pct": 3.62, + "stability": "noisy", + "runs": [ + 262.3, + 254.2, + 254.5, + 278.0, + 260.2, + 267.7, + 253.4, + 262.9, + 259.5, + 276.3, + 252.2, + 242.8, + 280.9, + 268.2, + 253.6, + 260.4, + 264.4, + 273.4, + 259.1, + 264.3, + 258.9, + 253.2, + 263.8, + 265.3, + 263.8, + 250.9, + 256.8, + 282.0 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/sustained/result.json b/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/sustained/result.json index 19390809..a8b21777 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/sustained/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 262.2, "throttle_ratio": 0.861, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -18.4 + "ttft_p99_drift_ms": -18.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 262.2, + "std": 9.5, + "cv_pct": 3.62, + "stability": "noisy", + "runs": [ + 262.3, + 254.2, + 254.5, + 278.0, + 260.2, + 267.7, + 253.4, + 262.9, + 259.5, + 276.3, + 252.2, + 242.8, + 280.9, + 268.2, + 253.6, + 260.4, + 264.4, + 273.4, + 259.1, + 264.3, + 258.9, + 253.2, + 263.8, + 265.3, + 263.8, + 250.9, + 256.8, + 282.0 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/result.json b/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/result.json index 9d0b9088..aae2043c 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/result.json @@ -620,7 +620,44 @@ "sustained_throughput_tokens_per_sec": 376.6, "throttle_ratio": 0.893, "throttle_onset_minute": 21.0, - "ttft_p99_drift_ms": -8.0 + "ttft_p99_drift_ms": -8.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 376.6, + "std": 11.4, + "cv_pct": 3.03, + "stability": "noisy", + "runs": [ + 368.1, + 384.3, + 387.7, + 375.4, + 366.8, + 376.1, + 382.0, + 372.0, + 377.9, + 372.0, + 369.6, + 377.6, + 376.8, + 385.8, + 362.3, + 392.8, + 363.8, + 393.9, + 373.8, + 352.6, + 394.1, + 360.1, + 390.1, + 373.1, + 388.4, + 370.3, + 394.7, + 363.7 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/sustained/result.json b/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/sustained/result.json index 0a93919d..f2d38380 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/sustained/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 376.6, "throttle_ratio": 0.893, "throttle_onset_minute": 21.0, - "ttft_p99_drift_ms": -8.0 + "ttft_p99_drift_ms": -8.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 376.6, + "std": 11.4, + "cv_pct": 3.03, + "stability": "noisy", + "runs": [ + 368.1, + 384.3, + 387.7, + 375.4, + 366.8, + 376.1, + 382.0, + 372.0, + 377.9, + 372.0, + 369.6, + 377.6, + 376.8, + 385.8, + 362.3, + 392.8, + 363.8, + 393.9, + 373.8, + 352.6, + 394.1, + 360.1, + 390.1, + 373.1, + 388.4, + 370.3, + 394.7, + 363.7 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json index bc4cf352..be7c0d9d 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json @@ -564,7 +564,44 @@ "sustained_throughput_tokens_per_sec": 54.2, "throttle_ratio": 0.784, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -321.6 + "ttft_p99_drift_ms": -321.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 54.2, + "std": 3.8, + "cv_pct": 7.06, + "stability": "unstable", + "runs": [ + 46.9, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.5, + 55.5, + 51.2, + 59.7, + 51.2, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.5, + 55.5, + 46.9, + 59.8, + 51.2, + 59.8, + 51.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json index 03f09823..c230659d 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 54.2, "throttle_ratio": 0.784, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -321.6 + "ttft_p99_drift_ms": -321.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 54.2, + "std": 3.8, + "cv_pct": 7.06, + "stability": "unstable", + "runs": [ + 46.9, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.5, + 55.5, + 51.2, + 59.7, + 51.2, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.5, + 55.5, + 46.9, + 59.8, + 51.2, + 59.8, + 51.2 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/result.json b/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/result.json index 6f66bb12..0d5e67ec 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/result.json @@ -456,7 +456,30 @@ "sustained_throughput_tokens_per_sec": 2217.9, "throttle_ratio": 0.94, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -217.5 + "ttft_p99_drift_ms": -217.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2217.9, + "std": 34.3, + "cv_pct": 1.54, + "stability": "stable", + "runs": [ + 2121.0, + 2228.7, + 2201.3, + 2198.4, + 2215.7, + 2232.6, + 2225.4, + 2220.6, + 2187.0, + 2241.5, + 2245.4, + 2250.2, + 2255.4, + 2227.6 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/sustained/result.json b/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/sustained/result.json index 2d1b61e9..bcb9d743 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/sustained/result.json @@ -372,7 +372,30 @@ "sustained_throughput_tokens_per_sec": 2217.9, "throttle_ratio": 0.94, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -217.5 + "ttft_p99_drift_ms": -217.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2217.9, + "std": 34.3, + "cv_pct": 1.54, + "stability": "stable", + "runs": [ + 2121.0, + 2228.7, + 2201.3, + 2198.4, + 2215.7, + 2232.6, + 2225.4, + 2220.6, + 2187.0, + 2241.5, + 2245.4, + 2250.2, + 2255.4, + 2227.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/result.json index 653ebddc..70f42b4b 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/result.json @@ -495,7 +495,44 @@ "sustained_throughput_tokens_per_sec": 484.0, "throttle_ratio": 0.892, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -13.4 + "ttft_p99_drift_ms": -13.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 484.0, + "std": 15.3, + "cv_pct": 3.16, + "stability": "noisy", + "runs": [ + 489.2, + 480.6, + 476.6, + 474.6, + 484.4, + 490.8, + 462.1, + 485.0, + 453.9, + 462.0, + 486.8, + 454.3, + 483.2, + 467.2, + 483.4, + 480.4, + 468.5, + 501.4, + 507.5, + 482.9, + 503.3, + 498.0, + 496.9, + 494.1, + 496.2, + 479.2, + 508.6, + 501.7 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/sustained/result.json index d553f1f1..7075969f 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/sustained/result.json @@ -397,7 +397,44 @@ "sustained_throughput_tokens_per_sec": 484.0, "throttle_ratio": 0.892, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -13.4 + "ttft_p99_drift_ms": -13.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 484.0, + "std": 15.3, + "cv_pct": 3.16, + "stability": "noisy", + "runs": [ + 489.2, + 480.6, + 476.6, + 474.6, + 484.4, + 490.8, + 462.1, + 485.0, + 453.9, + 462.0, + 486.8, + 454.3, + 483.2, + 467.2, + 483.4, + 480.4, + 468.5, + 501.4, + 507.5, + 482.9, + 503.3, + 498.0, + 496.9, + 494.1, + 496.2, + 479.2, + 508.6, + 501.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json index fbc82d38..cdd18aab 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 491.9, "throttle_ratio": 0.898, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -144.4 + "ttft_p99_drift_ms": -144.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 491.9, + "std": 14.0, + "cv_pct": 2.85, + "stability": "noisy", + "runs": [ + 459.7, + 507.7, + 490.3, + 491.6, + 491.2, + 504.1, + 484.6, + 487.8, + 504.7, + 481.5, + 511.8, + 475.1, + 500.9, + 496.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json index b9c8c6bf..efdcdade 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 491.9, "throttle_ratio": 0.898, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -144.4 + "ttft_p99_drift_ms": -144.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 491.9, + "std": 14.0, + "cv_pct": 2.85, + "stability": "noisy", + "runs": [ + 459.7, + 507.7, + 490.3, + 491.6, + 491.2, + 504.1, + 484.6, + 487.8, + 504.7, + 481.5, + 511.8, + 475.1, + 500.9, + 496.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/result.json index b98c6b18..7b184c01 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 709.1, "throttle_ratio": 0.935, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -196.6 + "ttft_p99_drift_ms": -196.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.1, + "std": 12.2, + "cv_pct": 1.72, + "stability": "stable", + "runs": [ + 674.8, + 719.2, + 722.0, + 704.1, + 703.0, + 722.0, + 703.0, + 708.7, + 717.0, + 717.1, + 706.9, + 716.7, + 702.6, + 710.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/sustained/result.json index 554f0644..006c7372 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 709.1, "throttle_ratio": 0.935, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -196.6 + "ttft_p99_drift_ms": -196.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.1, + "std": 12.2, + "cv_pct": 1.72, + "stability": "stable", + "runs": [ + 674.8, + 719.2, + 722.0, + 704.1, + 703.0, + 722.0, + 703.0, + 708.7, + 717.0, + 717.1, + 706.9, + 716.7, + 702.6, + 710.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json index 67954308..d0b750f9 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 813.5, "throttle_ratio": 0.926, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -204.3 + "ttft_p99_drift_ms": -204.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 813.5, + "std": 17.7, + "cv_pct": 2.17, + "stability": "noisy", + "runs": [ + 778.9, + 820.5, + 809.4, + 823.7, + 823.7, + 793.7, + 828.6, + 841.5, + 797.3, + 818.5, + 799.6, + 829.7, + 826.3, + 797.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json index 36bb8403..a02ef774 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 813.5, "throttle_ratio": 0.926, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -204.3 + "ttft_p99_drift_ms": -204.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 813.5, + "std": 17.7, + "cv_pct": 2.17, + "stability": "noisy", + "runs": [ + 778.9, + 820.5, + 809.4, + 823.7, + 823.7, + 793.7, + 828.6, + 841.5, + 797.3, + 818.5, + 799.6, + 829.7, + 826.3, + 797.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/result.json index ca1752d1..a9bc15bf 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 700.0, "throttle_ratio": 0.945, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -208.8 + "ttft_p99_drift_ms": -208.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 700.0, + "std": 10.5, + "cv_pct": 1.51, + "stability": "stable", + "runs": [ + 676.5, + 702.7, + 701.1, + 708.1, + 693.3, + 702.6, + 708.2, + 696.5, + 697.5, + 713.5, + 685.0, + 716.0, + 702.4, + 696.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/sustained/result.json index 08e47dc3..e5d34321 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 700.0, "throttle_ratio": 0.945, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -208.8 + "ttft_p99_drift_ms": -208.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 700.0, + "std": 10.5, + "cv_pct": 1.51, + "stability": "stable", + "runs": [ + 676.5, + 702.7, + 701.1, + 708.1, + 693.3, + 702.6, + 708.2, + 696.5, + 697.5, + 713.5, + 685.0, + 716.0, + 702.4, + 696.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/result.json index 040a4132..9f88f9ba 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 657.7, "throttle_ratio": 0.922, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -106.5 + "ttft_p99_drift_ms": -106.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 657.7, + "std": 11.4, + "cv_pct": 1.73, + "stability": "stable", + "runs": [ + 632.3, + 685.6, + 656.9, + 651.9, + 656.6, + 660.3, + 662.7, + 648.2, + 660.2, + 654.3, + 661.2, + 664.8, + 654.3, + 659.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/sustained/result.json index 74ebc6a6..15e689f1 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 657.7, "throttle_ratio": 0.922, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -106.5 + "ttft_p99_drift_ms": -106.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 657.7, + "std": 11.4, + "cv_pct": 1.73, + "stability": "stable", + "runs": [ + 632.3, + 685.6, + 656.9, + 651.9, + 656.6, + 660.3, + 662.7, + 648.2, + 660.2, + 654.3, + 661.2, + 664.8, + 654.3, + 659.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json index 0003f085..72e20277 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json @@ -439,7 +439,44 @@ "sustained_throughput_tokens_per_sec": 57.0, "throttle_ratio": 0.705, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -451.2 + "ttft_p99_drift_ms": -451.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 57.0, + "std": 7.4, + "cv_pct": 13.07, + "stability": "unstable", + "runs": [ + 51.2, + 55.5, + 68.2, + 51.2, + 51.2, + 55.4, + 72.5, + 51.2, + 55.5, + 51.2, + 55.5, + 68.2, + 55.5, + 55.5, + 51.1, + 68.4, + 55.4, + 55.5, + 51.2, + 51.2, + 72.5, + 55.5, + 51.2, + 55.5, + 51.2, + 72.5, + 51.2, + 55.5 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json index d4e23453..e17dfb22 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json @@ -397,7 +397,44 @@ "sustained_throughput_tokens_per_sec": 57.0, "throttle_ratio": 0.705, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -451.2 + "ttft_p99_drift_ms": -451.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 57.0, + "std": 7.4, + "cv_pct": 13.07, + "stability": "unstable", + "runs": [ + 51.2, + 55.5, + 68.2, + 51.2, + 51.2, + 55.4, + 72.5, + 51.2, + 55.5, + 51.2, + 55.5, + 68.2, + 55.5, + 55.5, + 51.1, + 68.4, + 55.4, + 55.5, + 51.2, + 51.2, + 72.5, + 55.5, + 51.2, + 55.5, + 51.2, + 72.5, + 51.2, + 55.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/result.json index 23b30f8b..5e8ac339 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/result.json @@ -331,7 +331,30 @@ "sustained_throughput_tokens_per_sec": 3972.5, "throttle_ratio": 0.963, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -114.8 + "ttft_p99_drift_ms": -114.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3972.5, + "std": 41.8, + "cv_pct": 1.05, + "stability": "stable", + "runs": [ + 3951.2, + 3922.5, + 4011.2, + 4072.5, + 3992.6, + 3964.1, + 4012.5, + 3989.7, + 3938.4, + 3932.9, + 3939.0, + 3938.5, + 3995.8, + 3954.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/sustained/result.json index 445a5911..be0a3b44 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 3972.5, "throttle_ratio": 0.963, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -114.8 + "ttft_p99_drift_ms": -114.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3972.5, + "std": 41.8, + "cv_pct": 1.05, + "stability": "stable", + "runs": [ + 3951.2, + 3922.5, + 4011.2, + 4072.5, + 3992.6, + 3964.1, + 4012.5, + 3989.7, + 3938.4, + 3932.9, + 3939.0, + 3938.5, + 3995.8, + 3954.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json index 7b24e63d..6da85580 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json @@ -574,7 +574,44 @@ "sustained_throughput_tokens_per_sec": 164.3, "throttle_ratio": 0.806, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": 17.4 + "ttft_p99_drift_ms": 17.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 164.3, + "std": 8.5, + "cv_pct": 5.2, + "stability": "unstable", + "runs": [ + 167.7, + 172.9, + 181.0, + 164.4, + 162.7, + 175.0, + 174.0, + 173.0, + 174.6, + 164.4, + 171.3, + 166.6, + 157.8, + 160.2, + 155.9, + 166.3, + 149.8, + 161.3, + 157.5, + 171.4, + 150.8, + 162.6, + 157.2, + 160.2, + 156.9, + 168.6, + 145.8, + 170.8 + ] + } }, "interactive": { "ttft_ms_p50": 81.65, diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json index 94afb943..1ee4778e 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 164.3, "throttle_ratio": 0.806, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": 17.4 + "ttft_p99_drift_ms": 17.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 164.3, + "std": 8.5, + "cv_pct": 5.2, + "stability": "unstable", + "runs": [ + 167.7, + 172.9, + 181.0, + 164.4, + 162.7, + 175.0, + 174.0, + 173.0, + 174.6, + 164.4, + 171.3, + 166.6, + 157.8, + 160.2, + 155.9, + 166.3, + 149.8, + 161.3, + 157.5, + 171.4, + 150.8, + 162.6, + 157.2, + 160.2, + 156.9, + 168.6, + 145.8, + 170.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json index 993f6d90..da9b0f7c 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 472.7, "throttle_ratio": 0.902, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -2.3 + "ttft_p99_drift_ms": -2.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 472.7, + "std": 11.2, + "cv_pct": 2.38, + "stability": "noisy", + "runs": [ + 482.4, + 470.0, + 478.0, + 464.6, + 475.5, + 474.1, + 475.7, + 467.2, + 478.8, + 476.8, + 443.2, + 477.3, + 480.2, + 459.0, + 465.2, + 486.8, + 468.0, + 466.5, + 484.3, + 454.4, + 491.3, + 483.4, + 482.3, + 454.0, + 481.7, + 470.6, + 462.2, + 482.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json index ddf5c312..b5aef3f7 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 472.7, "throttle_ratio": 0.902, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -2.3 + "ttft_p99_drift_ms": -2.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 472.7, + "std": 11.2, + "cv_pct": 2.38, + "stability": "noisy", + "runs": [ + 482.4, + 470.0, + 478.0, + 464.6, + 475.5, + 474.1, + 475.7, + 467.2, + 478.8, + 476.8, + 443.2, + 477.3, + 480.2, + 459.0, + 465.2, + 486.8, + 468.0, + 466.5, + 484.3, + 454.4, + 491.3, + 483.4, + 482.3, + 454.0, + 481.7, + 470.6, + 462.2, + 482.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json index ca26d93b..1dec8d50 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json @@ -494,7 +494,44 @@ "sustained_throughput_tokens_per_sec": 712.3, "throttle_ratio": 0.947, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -0.2 + "ttft_p99_drift_ms": -0.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 712.3, + "std": 9.4, + "cv_pct": 1.32, + "stability": "stable", + "runs": [ + 707.4, + 710.2, + 728.3, + 707.4, + 711.2, + 708.9, + 701.6, + 723.0, + 702.0, + 716.4, + 718.5, + 716.5, + 708.7, + 711.1, + 715.6, + 699.2, + 721.0, + 689.9, + 718.4, + 720.0, + 726.1, + 713.8, + 694.3, + 709.4, + 720.1, + 714.8, + 705.6, + 725.1 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json index 1fc95fb6..c99db7f5 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json @@ -394,7 +394,44 @@ "sustained_throughput_tokens_per_sec": 712.3, "throttle_ratio": 0.947, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -0.2 + "ttft_p99_drift_ms": -0.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 712.3, + "std": 9.4, + "cv_pct": 1.32, + "stability": "stable", + "runs": [ + 707.4, + 710.2, + 728.3, + 707.4, + 711.2, + 708.9, + 701.6, + 723.0, + 702.0, + 716.4, + 718.5, + 716.5, + 708.7, + 711.1, + 715.6, + 699.2, + 721.0, + 689.9, + 718.4, + 720.0, + 726.1, + 713.8, + 694.3, + 709.4, + 720.1, + 714.8, + 705.6, + 725.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/result.json index 10757712..f071d27d 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/result.json @@ -500,7 +500,44 @@ "sustained_throughput_tokens_per_sec": 551.9, "throttle_ratio": 0.868, "throttle_onset_minute": 18.0, - "ttft_p99_drift_ms": 9.0 + "ttft_p99_drift_ms": 9.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 551.9, + "std": 25.7, + "cv_pct": 4.66, + "stability": "noisy", + "runs": [ + 566.9, + 585.4, + 575.4, + 575.6, + 584.1, + 571.1, + 568.6, + 582.2, + 558.0, + 562.9, + 581.4, + 575.8, + 578.3, + 572.0, + 568.3, + 542.1, + 526.1, + 528.9, + 541.5, + 521.5, + 513.8, + 536.7, + 510.0, + 536.8, + 527.1, + 508.2, + 521.6, + 531.8 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/sustained/result.json index c1e56599..46ed843d 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 551.9, "throttle_ratio": 0.868, "throttle_onset_minute": 18.0, - "ttft_p99_drift_ms": 9.0 + "ttft_p99_drift_ms": 9.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 551.9, + "std": 25.7, + "cv_pct": 4.66, + "stability": "noisy", + "runs": [ + 566.9, + 585.4, + 575.4, + 575.6, + 584.1, + 571.1, + 568.6, + 582.2, + 558.0, + 562.9, + 581.4, + 575.8, + 578.3, + 572.0, + 568.3, + 542.1, + 526.1, + 528.9, + 541.5, + 521.5, + 513.8, + 536.7, + 510.0, + 536.8, + 527.1, + 508.2, + 521.6, + 531.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json index 75e68ff6..92d37745 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json @@ -357,7 +357,30 @@ "sustained_throughput_tokens_per_sec": 706.9, "throttle_ratio": 0.899, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -360.1 + "ttft_p99_drift_ms": -360.1, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 706.9, + "std": 19.3, + "cv_pct": 2.73, + "stability": "noisy", + "runs": [ + 655.0, + 711.0, + 698.3, + 718.8, + 724.4, + 701.8, + 706.9, + 720.6, + 697.1, + 726.2, + 701.9, + 728.3, + 688.9, + 717.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json index ee4ebabf..967d4064 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json @@ -244,7 +244,30 @@ "sustained_throughput_tokens_per_sec": 706.9, "throttle_ratio": 0.899, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -360.1 + "ttft_p99_drift_ms": -360.1, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 706.9, + "std": 19.3, + "cv_pct": 2.73, + "stability": "noisy", + "runs": [ + 655.0, + 711.0, + 698.3, + 718.8, + 724.4, + 701.8, + 706.9, + 720.6, + 697.1, + 726.2, + 701.9, + 728.3, + 688.9, + 717.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json index 27e0744a..e9eb5a19 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json @@ -362,7 +362,30 @@ "sustained_throughput_tokens_per_sec": 437.3, "throttle_ratio": 0.897, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -632.2 + "ttft_p99_drift_ms": -632.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 437.3, + "std": 11.6, + "cv_pct": 2.66, + "stability": "noisy", + "runs": [ + 409.4, + 431.5, + 456.2, + 439.7, + 430.2, + 432.7, + 452.2, + 436.1, + 432.3, + 431.9, + 449.0, + 445.3, + 441.9, + 433.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json index 9982bb06..ed999f4c 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json @@ -249,7 +249,30 @@ "sustained_throughput_tokens_per_sec": 437.3, "throttle_ratio": 0.897, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -632.2 + "ttft_p99_drift_ms": -632.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 437.3, + "std": 11.6, + "cv_pct": 2.66, + "stability": "noisy", + "runs": [ + 409.4, + 431.5, + 456.2, + 439.7, + 430.2, + 432.7, + 452.2, + 436.1, + 432.3, + 431.9, + 449.0, + 445.3, + 441.9, + 433.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json index 485a0fb3..16534425 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json @@ -362,7 +362,30 @@ "sustained_throughput_tokens_per_sec": 494.1, "throttle_ratio": 0.905, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -320.5 + "ttft_p99_drift_ms": -320.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 494.1, + "std": 12.0, + "cv_pct": 2.42, + "stability": "noisy", + "runs": [ + 456.8, + 504.0, + 486.6, + 499.0, + 496.0, + 498.3, + 495.4, + 496.1, + 503.2, + 489.6, + 504.8, + 500.4, + 494.6, + 492.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json index 0fa36d8f..12cd1900 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json @@ -249,7 +249,30 @@ "sustained_throughput_tokens_per_sec": 494.1, "throttle_ratio": 0.905, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -320.5 + "ttft_p99_drift_ms": -320.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 494.1, + "std": 12.0, + "cv_pct": 2.42, + "stability": "noisy", + "runs": [ + 456.8, + 504.0, + 486.6, + 499.0, + 496.0, + 498.3, + 495.4, + 496.1, + 503.2, + 489.6, + 504.8, + 500.4, + 494.6, + 492.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json index ff6e59c1..0828f267 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json @@ -362,7 +362,30 @@ "sustained_throughput_tokens_per_sec": 399.4, "throttle_ratio": 0.879, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -331.5 + "ttft_p99_drift_ms": -331.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 399.4, + "std": 12.5, + "cv_pct": 3.13, + "stability": "noisy", + "runs": [ + 366.9, + 402.7, + 400.8, + 402.1, + 395.3, + 398.4, + 410.0, + 398.6, + 390.3, + 417.2, + 391.1, + 408.7, + 415.2, + 395.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json index eaea3a87..01b72946 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json @@ -249,7 +249,30 @@ "sustained_throughput_tokens_per_sec": 399.4, "throttle_ratio": 0.879, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -331.5 + "ttft_p99_drift_ms": -331.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 399.4, + "std": 12.5, + "cv_pct": 3.13, + "stability": "noisy", + "runs": [ + 366.9, + 402.7, + 400.8, + 402.1, + 395.3, + 398.4, + 410.0, + 398.6, + 390.3, + 417.2, + 391.1, + 408.7, + 415.2, + 395.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/result.json index f1aeef5e..1db3621f 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 537.9, "throttle_ratio": 0.87, "throttle_onset_minute": 9.0, - "ttft_p99_drift_ms": -138.7 + "ttft_p99_drift_ms": -138.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 537.9, + "std": 25.5, + "cv_pct": 4.74, + "stability": "noisy", + "runs": [ + 516.9, + 570.2, + 563.2, + 574.0, + 567.1, + 569.5, + 533.9, + 526.7, + 509.9, + 532.6, + 499.2, + 522.4, + 520.7, + 524.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/sustained/result.json index 74ec1b43..3948212c 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 537.9, "throttle_ratio": 0.87, "throttle_onset_minute": 9.0, - "ttft_p99_drift_ms": -138.7 + "ttft_p99_drift_ms": -138.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 537.9, + "std": 25.5, + "cv_pct": 4.74, + "stability": "noisy", + "runs": [ + 516.9, + 570.2, + 563.2, + 574.0, + 567.1, + 569.5, + 533.9, + 526.7, + 509.9, + 532.6, + 499.2, + 522.4, + 520.7, + 524.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json index 5d3e2c1d..7e9b7e10 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 709.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -189.7 + "ttft_p99_drift_ms": -189.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.7, + "std": 15.4, + "cv_pct": 2.18, + "stability": "noisy", + "runs": [ + 741.9, + 718.6, + 701.6, + 688.1, + 729.3, + 691.6, + 728.3, + 715.4, + 712.9, + 702.4, + 699.8, + 698.3, + 705.5, + 702.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json index c8f36da6..29aab15e 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 709.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -189.7 + "ttft_p99_drift_ms": -189.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.7, + "std": 15.4, + "cv_pct": 2.18, + "stability": "noisy", + "runs": [ + 741.9, + 718.6, + 701.6, + 688.1, + 729.3, + 691.6, + 728.3, + 715.4, + 712.9, + 702.4, + 699.8, + 698.3, + 705.5, + 702.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/result.json index d4c5752a..fc10a331 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 757.0, "throttle_ratio": 0.942, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -185.6 + "ttft_p99_drift_ms": -185.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 757.0, + "std": 14.6, + "cv_pct": 1.92, + "stability": "stable", + "runs": [ + 737.0, + 764.7, + 762.2, + 771.5, + 740.1, + 755.5, + 752.4, + 744.3, + 781.3, + 745.1, + 782.2, + 748.8, + 747.8, + 765.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/sustained/result.json index 46c58f19..afa0d716 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 757.0, "throttle_ratio": 0.942, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -185.6 + "ttft_p99_drift_ms": -185.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 757.0, + "std": 14.6, + "cv_pct": 1.92, + "stability": "stable", + "runs": [ + 737.0, + 764.7, + 762.2, + 771.5, + 740.1, + 755.5, + 752.4, + 744.3, + 781.3, + 745.1, + 782.2, + 748.8, + 747.8, + 765.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json index d2f40e9b..aba7eea1 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 694.8, "throttle_ratio": 0.912, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -179.3 + "ttft_p99_drift_ms": -179.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 694.8, + "std": 17.4, + "cv_pct": 2.51, + "stability": "noisy", + "runs": [ + 728.5, + 716.1, + 677.7, + 692.5, + 712.4, + 664.6, + 700.4, + 688.4, + 678.1, + 692.0, + 710.5, + 679.9, + 690.7, + 695.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json index 21592a14..2a52a892 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 694.8, "throttle_ratio": 0.912, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -179.3 + "ttft_p99_drift_ms": -179.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 694.8, + "std": 17.4, + "cv_pct": 2.51, + "stability": "noisy", + "runs": [ + 728.5, + 716.1, + 677.7, + 692.5, + 712.4, + 664.6, + 700.4, + 688.4, + 678.1, + 692.0, + 710.5, + 679.9, + 690.7, + 695.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json index 0daadb7b..c4a7d19c 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 643.9, "throttle_ratio": 0.925, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -92.0 + "ttft_p99_drift_ms": -92.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 643.9, + "std": 13.7, + "cv_pct": 2.12, + "stability": "noisy", + "runs": [ + 655.9, + 662.5, + 620.7, + 649.8, + 643.7, + 631.6, + 645.2, + 628.6, + 671.1, + 634.4, + 650.0, + 633.5, + 643.4, + 643.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json index bf2083e9..e77a2cc5 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 643.9, "throttle_ratio": 0.925, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -92.0 + "ttft_p99_drift_ms": -92.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 643.9, + "std": 13.7, + "cv_pct": 2.12, + "stability": "noisy", + "runs": [ + 655.9, + 662.5, + 620.7, + 649.8, + 643.7, + 631.6, + 645.2, + 628.6, + 671.1, + 634.4, + 650.0, + 633.5, + 643.4, + 643.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json index ac745b18..c84caf5f 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json @@ -438,7 +438,44 @@ "sustained_throughput_tokens_per_sec": 58.7, "throttle_ratio": 0.866, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 202.0 + "ttft_p99_drift_ms": 202.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 58.7, + "std": 3.2, + "cv_pct": 5.46, + "stability": "unstable", + "runs": [ + 55.4, + 59.7, + 55.5, + 64.0, + 55.5, + 59.7, + 59.8, + 59.7, + 59.7, + 55.5, + 64.0, + 55.5, + 55.5, + 59.7, + 64.0, + 55.4, + 55.5, + 64.0, + 55.5, + 59.7, + 55.5, + 64.0, + 55.4, + 59.8, + 59.7, + 59.7, + 59.7, + 55.5 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json index 097e0e91..a311a3ac 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json @@ -394,7 +394,44 @@ "sustained_throughput_tokens_per_sec": 58.7, "throttle_ratio": 0.866, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 202.0 + "ttft_p99_drift_ms": 202.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 58.7, + "std": 3.2, + "cv_pct": 5.46, + "stability": "unstable", + "runs": [ + 55.4, + 59.7, + 55.5, + 64.0, + 55.5, + 59.7, + 59.8, + 59.7, + 59.7, + 55.5, + 64.0, + 55.5, + 55.5, + 59.7, + 64.0, + 55.4, + 55.5, + 64.0, + 55.5, + 59.7, + 55.5, + 64.0, + 55.4, + 59.8, + 59.7, + 59.7, + 59.7, + 55.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json index 57b288a8..f17a3a85 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json @@ -444,7 +444,44 @@ "sustained_throughput_tokens_per_sec": 67.1, "throttle_ratio": 0.501, "throttle_onset_minute": 21.0, - "ttft_p99_drift_ms": -474.5 + "ttft_p99_drift_ms": -474.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 67.1, + "std": 6.4, + "cv_pct": 9.6, + "stability": "unstable", + "runs": [ + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 34.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json index bf336ef5..e6a370cc 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 67.1, "throttle_ratio": 0.501, "throttle_onset_minute": 21.0, - "ttft_p99_drift_ms": -474.5 + "ttft_p99_drift_ms": -474.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 67.1, + "std": 6.4, + "cv_pct": 9.6, + "stability": "unstable", + "runs": [ + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 34.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json index 2e7e0ce3..dc61504c 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json @@ -336,7 +336,30 @@ "sustained_throughput_tokens_per_sec": 11576.2, "throttle_ratio": 0.958, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -15.4 + "ttft_p99_drift_ms": -15.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 11576.2, + "std": 139.5, + "cv_pct": 1.2, + "stability": "stable", + "runs": [ + 11541.5, + 11672.6, + 11721.9, + 11526.8, + 11228.1, + 11380.9, + 11711.7, + 11643.5, + 11662.7, + 11612.6, + 11623.8, + 11639.3, + 11651.4, + 11450.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json index 6851ff63..78b09b49 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json @@ -249,7 +249,30 @@ "sustained_throughput_tokens_per_sec": 11576.2, "throttle_ratio": 0.958, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -15.4 + "ttft_p99_drift_ms": -15.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 11576.2, + "std": 139.5, + "cv_pct": 1.2, + "stability": "stable", + "runs": [ + 11541.5, + 11672.6, + 11721.9, + 11526.8, + 11228.1, + 11380.9, + 11711.7, + 11643.5, + 11662.7, + 11612.6, + 11623.8, + 11639.3, + 11651.4, + 11450.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json index 4d2726b7..6fd4617e 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json @@ -336,7 +336,30 @@ "sustained_throughput_tokens_per_sec": 2386.8, "throttle_ratio": 0.746, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -9.6 + "ttft_p99_drift_ms": -9.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2386.8, + "std": 225.6, + "cv_pct": 9.45, + "stability": "unstable", + "runs": [ + 2796.4, + 2979.6, + 2296.3, + 2250.9, + 2332.1, + 2333.1, + 2362.3, + 2258.5, + 2439.4, + 2412.5, + 2221.4, + 2241.1, + 2268.2, + 2223.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json index a1bb052b..367eaf0c 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 2386.8, "throttle_ratio": 0.746, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -9.6 + "ttft_p99_drift_ms": -9.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2386.8, + "std": 225.6, + "cv_pct": 9.45, + "stability": "unstable", + "runs": [ + 2796.4, + 2979.6, + 2296.3, + 2250.9, + 2332.1, + 2333.1, + 2362.3, + 2258.5, + 2439.4, + 2412.5, + 2221.4, + 2241.1, + 2268.2, + 2223.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/result.json index 09fc23d5..56b152e2 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/result.json @@ -500,7 +500,44 @@ "sustained_throughput_tokens_per_sec": 546.4, "throttle_ratio": 0.881, "throttle_onset_minute": 23.0, - "ttft_p99_drift_ms": -0.7 + "ttft_p99_drift_ms": -0.7, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 546.4, + "std": 17.4, + "cv_pct": 3.19, + "stability": "noisy", + "runs": [ + 566.8, + 559.7, + 544.8, + 551.0, + 525.8, + 529.8, + 531.9, + 554.8, + 584.0, + 548.1, + 572.2, + 531.0, + 546.0, + 552.1, + 545.3, + 565.6, + 535.0, + 550.0, + 552.7, + 555.4, + 540.9, + 518.7, + 514.7, + 542.3, + 546.4, + 521.4, + 578.2, + 533.5 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/sustained/result.json index d327a16a..f3bb99cb 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 546.4, "throttle_ratio": 0.881, "throttle_onset_minute": 23.0, - "ttft_p99_drift_ms": -0.7 + "ttft_p99_drift_ms": -0.7, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 546.4, + "std": 17.4, + "cv_pct": 3.19, + "stability": "noisy", + "runs": [ + 566.8, + 559.7, + 544.8, + 551.0, + 525.8, + 529.8, + 531.9, + 554.8, + 584.0, + 548.1, + 572.2, + 531.0, + 546.0, + 552.1, + 545.3, + 565.6, + 535.0, + 550.0, + 552.7, + 555.4, + 540.9, + 518.7, + 514.7, + 542.3, + 546.4, + 521.4, + 578.2, + 533.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json index 0ed01fcb..41c11393 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 534.5, "throttle_ratio": 0.933, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -140.3 + "ttft_p99_drift_ms": -140.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 534.5, + "std": 12.1, + "cv_pct": 2.26, + "stability": "noisy", + "runs": [ + 559.4, + 525.8, + 552.5, + 528.7, + 547.5, + 525.4, + 532.6, + 526.2, + 521.7, + 531.5, + 521.7, + 533.9, + 528.7, + 546.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json index 7b8752c6..26bdb582 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 534.5, "throttle_ratio": 0.933, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -140.3 + "ttft_p99_drift_ms": -140.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 534.5, + "std": 12.1, + "cv_pct": 2.26, + "stability": "noisy", + "runs": [ + 559.4, + 525.8, + 552.5, + 528.7, + 547.5, + 525.4, + 532.6, + 526.2, + 521.7, + 531.5, + 521.7, + 533.9, + 528.7, + 546.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/result.json index 73b517f2..63d529d9 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 760.1, "throttle_ratio": 0.893, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -171.7 + "ttft_p99_drift_ms": -171.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 760.1, + "std": 33.5, + "cv_pct": 4.4, + "stability": "noisy", + "runs": [ + 769.6, + 793.5, + 770.5, + 800.0, + 794.2, + 803.6, + 796.6, + 754.2, + 723.9, + 717.3, + 739.1, + 737.1, + 719.0, + 722.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/sustained/result.json index 9f6d234c..bbe68d18 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 760.1, "throttle_ratio": 0.893, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -171.7 + "ttft_p99_drift_ms": -171.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 760.1, + "std": 33.5, + "cv_pct": 4.4, + "stability": "noisy", + "runs": [ + 769.6, + 793.5, + 770.5, + 800.0, + 794.2, + 803.6, + 796.6, + 754.2, + 723.9, + 717.3, + 739.1, + 737.1, + 719.0, + 722.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json index 3a45077f..23828891 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 829.8, "throttle_ratio": 0.83, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -193.6 + "ttft_p99_drift_ms": -193.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 829.8, + "std": 56.4, + "cv_pct": 6.8, + "stability": "unstable", + "runs": [ + 836.7, + 902.0, + 882.7, + 918.5, + 861.0, + 915.1, + 763.1, + 803.2, + 805.7, + 783.7, + 818.0, + 791.1, + 772.9, + 762.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json index 587e4d7e..3319b45c 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 829.8, "throttle_ratio": 0.83, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -193.6 + "ttft_p99_drift_ms": -193.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 829.8, + "std": 56.4, + "cv_pct": 6.8, + "stability": "unstable", + "runs": [ + 836.7, + 902.0, + 882.7, + 918.5, + 861.0, + 915.1, + 763.1, + 803.2, + 805.7, + 783.7, + 818.0, + 791.1, + 772.9, + 762.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json index e207171a..810fda95 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 745.0, "throttle_ratio": 0.866, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -190.8 + "ttft_p99_drift_ms": -190.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 745.0, + "std": 40.9, + "cv_pct": 5.49, + "stability": "unstable", + "runs": [ + 738.9, + 809.0, + 793.6, + 810.7, + 771.9, + 783.8, + 735.7, + 702.0, + 702.2, + 717.8, + 702.5, + 722.4, + 702.6, + 737.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json index bc45102a..d1b31b83 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 745.0, "throttle_ratio": 0.866, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -190.8 + "ttft_p99_drift_ms": -190.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 745.0, + "std": 40.9, + "cv_pct": 5.49, + "stability": "unstable", + "runs": [ + 738.9, + 809.0, + 793.6, + 810.7, + 771.9, + 783.8, + 735.7, + 702.0, + 702.2, + 717.8, + 702.5, + 722.4, + 702.6, + 737.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json index 494d6c51..eacbbf9f 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 715.7, "throttle_ratio": 0.843, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -94.9 + "ttft_p99_drift_ms": -94.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 715.7, + "std": 43.7, + "cv_pct": 6.1, + "stability": "unstable", + "runs": [ + 717.7, + 704.7, + 752.5, + 762.7, + 771.9, + 752.2, + 759.1, + 732.5, + 743.4, + 681.3, + 653.7, + 669.9, + 667.5, + 650.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json index f37b0c8d..f9f0da01 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 715.7, "throttle_ratio": 0.843, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -94.9 + "ttft_p99_drift_ms": -94.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 715.7, + "std": 43.7, + "cv_pct": 6.1, + "stability": "unstable", + "runs": [ + 717.7, + 704.7, + 752.5, + 762.7, + 771.9, + 752.2, + 759.1, + 732.5, + 743.4, + 681.3, + 653.7, + 669.9, + 667.5, + 650.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json index 5596dda6..d6d62308 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json @@ -444,7 +444,44 @@ "sustained_throughput_tokens_per_sec": 67.0, "throttle_ratio": 0.502, "throttle_onset_minute": 28.0, - "ttft_p99_drift_ms": -397.5 + "ttft_p99_drift_ms": -397.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 67.0, + "std": 6.4, + "cv_pct": 9.57, + "stability": "unstable", + "runs": [ + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 67.9, + 34.3, + 68.3 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json index 108f892d..4e95085a 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 67.0, "throttle_ratio": 0.502, "throttle_onset_minute": 28.0, - "ttft_p99_drift_ms": -397.5 + "ttft_p99_drift_ms": -397.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 67.0, + "std": 6.4, + "cv_pct": 9.57, + "stability": "unstable", + "runs": [ + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 67.9, + 34.3, + 68.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json index cc527f3c..2193513b 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json @@ -336,7 +336,30 @@ "sustained_throughput_tokens_per_sec": 2804.8, "throttle_ratio": 0.602, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -11.5 + "ttft_p99_drift_ms": -11.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2804.8, + "std": 516.6, + "cv_pct": 18.42, + "stability": "unstable", + "runs": [ + 3817.9, + 4192.3, + 2621.3, + 2548.3, + 2703.0, + 2680.5, + 2557.3, + 2641.8, + 2612.4, + 2524.0, + 2626.8, + 2534.0, + 2647.2, + 2560.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json index 04f3b842..9c8f54a6 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 2804.8, "throttle_ratio": 0.602, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -11.5 + "ttft_p99_drift_ms": -11.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2804.8, + "std": 516.6, + "cv_pct": 18.42, + "stability": "unstable", + "runs": [ + 3817.9, + 4192.3, + 2621.3, + 2548.3, + 2703.0, + 2680.5, + 2557.3, + 2641.8, + 2612.4, + 2524.0, + 2626.8, + 2534.0, + 2647.2, + 2560.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json index 832b5d1a..ae0b7af5 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json @@ -574,7 +574,44 @@ "sustained_throughput_tokens_per_sec": 184.0, "throttle_ratio": 0.816, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -7.8 + "ttft_p99_drift_ms": -7.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 184.0, + "std": 9.5, + "cv_pct": 5.15, + "stability": "unstable", + "runs": [ + 180.1, + 187.5, + 193.8, + 183.5, + 191.3, + 175.4, + 186.1, + 181.5, + 189.5, + 170.3, + 184.2, + 178.7, + 189.4, + 173.7, + 187.8, + 187.0, + 180.2, + 188.7, + 182.7, + 173.0, + 202.4, + 172.3, + 203.0, + 174.7, + 199.2, + 178.3, + 165.6, + 192.4 + ] + } }, "interactive": { "ttft_ms_p50": 81.8, diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json index 698c9eab..3330dbb3 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 184.0, "throttle_ratio": 0.816, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -7.8 + "ttft_p99_drift_ms": -7.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 184.0, + "std": 9.5, + "cv_pct": 5.15, + "stability": "unstable", + "runs": [ + 180.1, + 187.5, + 193.8, + 183.5, + 191.3, + 175.4, + 186.1, + 181.5, + 189.5, + 170.3, + 184.2, + 178.7, + 189.4, + 173.7, + 187.8, + 187.0, + 180.2, + 188.7, + 182.7, + 173.0, + 202.4, + 172.3, + 203.0, + 174.7, + 199.2, + 178.3, + 165.6, + 192.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/result.json index 83525842..269b6f03 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 569.1, "throttle_ratio": 0.845, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -27.8 + "ttft_p99_drift_ms": -27.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 569.1, + "std": 22.4, + "cv_pct": 3.93, + "stability": "noisy", + "runs": [ + 573.8, + 571.7, + 596.7, + 594.4, + 584.0, + 552.0, + 550.9, + 538.9, + 519.3, + 542.0, + 614.7, + 587.1, + 597.3, + 593.2, + 551.6, + 581.8, + 573.7, + 550.5, + 564.5, + 571.8, + 535.8, + 589.0, + 565.8, + 585.7, + 578.7, + 564.8, + 550.8, + 553.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/sustained/result.json index 3f730cb1..33416153 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 569.1, "throttle_ratio": 0.845, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -27.8 + "ttft_p99_drift_ms": -27.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 569.1, + "std": 22.4, + "cv_pct": 3.93, + "stability": "noisy", + "runs": [ + 573.8, + 571.7, + 596.7, + 594.4, + 584.0, + 552.0, + 550.9, + 538.9, + 519.3, + 542.0, + 614.7, + 587.1, + 597.3, + 593.2, + 551.6, + 581.8, + 573.7, + 550.5, + 564.5, + 571.8, + 535.8, + 589.0, + 565.8, + 585.7, + 578.7, + 564.8, + 550.8, + 553.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/result.json index 82157fb0..0142e7ff 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/result.json @@ -504,7 +504,44 @@ "sustained_throughput_tokens_per_sec": 309.9, "throttle_ratio": 0.887, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": 5.6 + "ttft_p99_drift_ms": 5.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 309.9, + "std": 9.6, + "cv_pct": 3.1, + "stability": "noisy", + "runs": [ + 316.9, + 312.1, + 306.3, + 311.7, + 323.5, + 307.9, + 290.3, + 315.2, + 308.3, + 304.1, + 316.4, + 309.6, + 296.7, + 317.6, + 311.5, + 306.4, + 312.3, + 321.9, + 308.5, + 289.1, + 323.1, + 300.0, + 312.4, + 306.1, + 322.1, + 305.8, + 296.1, + 326.1 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/sustained/result.json index 8a59eb57..f4650348 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/sustained/result.json @@ -407,7 +407,44 @@ "sustained_throughput_tokens_per_sec": 309.9, "throttle_ratio": 0.887, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": 5.6 + "ttft_p99_drift_ms": 5.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 309.9, + "std": 9.6, + "cv_pct": 3.1, + "stability": "noisy", + "runs": [ + 316.9, + 312.1, + 306.3, + 311.7, + 323.5, + 307.9, + 290.3, + 315.2, + 308.3, + 304.1, + 316.4, + 309.6, + 296.7, + 317.6, + 311.5, + 306.4, + 312.3, + 321.9, + 308.5, + 289.1, + 323.1, + 300.0, + 312.4, + 306.1, + 322.1, + 305.8, + 296.1, + 326.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/result.json index c3c3e30e..72dbddc6 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 306.6, "throttle_ratio": 0.819, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -306.3 + "ttft_p99_drift_ms": -306.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 306.6, + "std": 14.3, + "cv_pct": 4.67, + "stability": "noisy", + "runs": [ + 267.6, + 322.3, + 315.8, + 306.4, + 314.6, + 296.7, + 326.9, + 299.4, + 311.9, + 301.7, + 315.6, + 307.7, + 306.1, + 299.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/sustained/result.json index 97b8ebbf..f1af1137 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 306.6, "throttle_ratio": 0.819, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -306.3 + "ttft_p99_drift_ms": -306.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 306.6, + "std": 14.3, + "cv_pct": 4.67, + "stability": "noisy", + "runs": [ + 267.6, + 322.3, + 315.8, + 306.4, + 314.6, + 296.7, + 326.9, + 299.4, + 311.9, + 301.7, + 315.6, + 307.7, + 306.1, + 299.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json index c5467b23..cec7cb53 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 472.4, "throttle_ratio": 0.918, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -489.7 + "ttft_p99_drift_ms": -489.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 472.4, + "std": 11.9, + "cv_pct": 2.51, + "stability": "noisy", + "runs": [ + 455.4, + 475.3, + 464.4, + 488.4, + 476.5, + 479.4, + 465.6, + 471.6, + 462.5, + 468.5, + 496.3, + 463.9, + 460.1, + 486.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json index 5e0a930f..dce17a6b 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 472.4, "throttle_ratio": 0.918, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -489.7 + "ttft_p99_drift_ms": -489.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 472.4, + "std": 11.9, + "cv_pct": 2.51, + "stability": "noisy", + "runs": [ + 455.4, + 475.3, + 464.4, + 488.4, + 476.5, + 479.4, + 465.6, + 471.6, + 462.5, + 468.5, + 496.3, + 463.9, + 460.1, + 486.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/result.json index 74c8204c..3044937c 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 588.9, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -466.4 + "ttft_p99_drift_ms": -466.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 588.9, + "std": 8.1, + "cv_pct": 1.38, + "stability": "stable", + "runs": [ + 578.0, + 592.6, + 590.0, + 609.6, + 583.9, + 593.0, + 583.9, + 584.0, + 584.1, + 599.7, + 584.2, + 584.8, + 592.3, + 584.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/sustained/result.json index 64fff39e..19515355 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 588.9, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -466.4 + "ttft_p99_drift_ms": -466.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 588.9, + "std": 8.1, + "cv_pct": 1.38, + "stability": "stable", + "runs": [ + 578.0, + 592.6, + 590.0, + 609.6, + 583.9, + 593.0, + 583.9, + 584.0, + 584.1, + 599.7, + 584.2, + 584.8, + 592.3, + 584.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json index ffb70f72..a31946b8 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 475.6, "throttle_ratio": 0.907, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -484.4 + "ttft_p99_drift_ms": -484.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 475.6, + "std": 12.6, + "cv_pct": 2.64, + "stability": "noisy", + "runs": [ + 449.0, + 494.9, + 484.5, + 476.7, + 460.0, + 478.3, + 478.5, + 476.5, + 484.4, + 460.0, + 478.1, + 479.4, + 467.4, + 490.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json index 3f598712..f10b3791 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 475.6, "throttle_ratio": 0.907, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -484.4 + "ttft_p99_drift_ms": -484.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 475.6, + "std": 12.6, + "cv_pct": 2.64, + "stability": "noisy", + "runs": [ + 449.0, + 494.9, + 484.5, + 476.7, + 460.0, + 478.3, + 478.5, + 476.5, + 484.4, + 460.0, + 478.1, + 479.4, + 467.4, + 490.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/result.json index d367a2b7..db9b3c8a 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 475.9, "throttle_ratio": 0.925, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -57.8 + "ttft_p99_drift_ms": -57.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 475.9, + "std": 9.2, + "cv_pct": 1.93, + "stability": "stable", + "runs": [ + 453.3, + 472.7, + 483.2, + 471.1, + 490.0, + 477.4, + 472.5, + 471.6, + 479.5, + 488.8, + 475.3, + 472.2, + 483.4, + 471.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/sustained/result.json index c2f822d9..b71ee9c9 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 475.9, "throttle_ratio": 0.925, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -57.8 + "ttft_p99_drift_ms": -57.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 475.9, + "std": 9.2, + "cv_pct": 1.93, + "stability": "stable", + "runs": [ + 453.3, + 472.7, + 483.2, + 471.1, + 490.0, + 477.4, + 472.5, + 471.6, + 479.5, + 488.8, + 475.3, + 472.2, + 483.4, + 471.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json index b52db8fa..3de48f66 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json @@ -341,7 +341,30 @@ "sustained_throughput_tokens_per_sec": 2693.3, "throttle_ratio": 0.821, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -63.4 + "ttft_p99_drift_ms": -63.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2693.3, + "std": 140.0, + "cv_pct": 5.2, + "stability": "unstable", + "runs": [ + 3023.8, + 2938.2, + 2481.2, + 2558.2, + 2650.5, + 2645.4, + 2618.4, + 2683.7, + 2624.8, + 2660.3, + 2705.4, + 2652.5, + 2764.3, + 2699.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json index c26d796f..eb6e46df 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 2693.3, "throttle_ratio": 0.821, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -63.4 + "ttft_p99_drift_ms": -63.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2693.3, + "std": 140.0, + "cv_pct": 5.2, + "stability": "unstable", + "runs": [ + 3023.8, + 2938.2, + 2481.2, + 2558.2, + 2650.5, + 2645.4, + 2618.4, + 2683.7, + 2624.8, + 2660.3, + 2705.4, + 2652.5, + 2764.3, + 2699.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/result.json index 7e3068b3..1aebc5c9 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/result.json @@ -508,7 +508,44 @@ "sustained_throughput_tokens_per_sec": 400.1, "throttle_ratio": 0.936, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -2.6 + "ttft_p99_drift_ms": -2.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 400.1, + "std": 6.3, + "cv_pct": 1.58, + "stability": "stable", + "runs": [ + 406.1, + 399.7, + 406.1, + 387.3, + 399.9, + 393.1, + 403.1, + 407.1, + 395.6, + 399.7, + 401.3, + 393.3, + 407.1, + 406.0, + 399.4, + 402.4, + 391.0, + 399.7, + 384.6, + 410.9, + 403.4, + 398.5, + 399.7, + 400.9, + 393.3, + 407.0, + 405.7, + 399.9 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/sustained/result.json index 8b3f7656..bc017356 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/sustained/result.json @@ -411,7 +411,44 @@ "sustained_throughput_tokens_per_sec": 400.1, "throttle_ratio": 0.936, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -2.6 + "ttft_p99_drift_ms": -2.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 400.1, + "std": 6.3, + "cv_pct": 1.58, + "stability": "stable", + "runs": [ + 406.1, + 399.7, + 406.1, + 387.3, + 399.9, + 393.1, + 403.1, + 407.1, + 395.6, + 399.7, + 401.3, + 393.3, + 407.1, + 406.0, + 399.4, + 402.4, + 391.0, + 399.7, + 384.6, + 410.9, + 403.4, + 398.5, + 399.7, + 400.9, + 393.3, + 407.0, + 405.7, + 399.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/result.json index 4bf3f78d..ded7ce76 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 393.1, "throttle_ratio": 0.905, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -186.0 + "ttft_p99_drift_ms": -186.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 393.1, + "std": 11.9, + "cv_pct": 3.03, + "stability": "noisy", + "runs": [ + 368.4, + 385.6, + 407.1, + 388.2, + 401.4, + 392.9, + 407.1, + 385.4, + 399.1, + 373.5, + 405.2, + 399.6, + 391.1, + 398.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/sustained/result.json index f6741165..bda3d0bc 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 393.1, "throttle_ratio": 0.905, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -186.0 + "ttft_p99_drift_ms": -186.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 393.1, + "std": 11.9, + "cv_pct": 3.03, + "stability": "noisy", + "runs": [ + 368.4, + 385.6, + 407.1, + 388.2, + 401.4, + 392.9, + 407.1, + 385.4, + 399.1, + 373.5, + 405.2, + 399.6, + 391.1, + 398.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json index a00e451c..1cc10d7f 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 640.7, "throttle_ratio": 0.915, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -121.3 + "ttft_p99_drift_ms": -121.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 640.7, + "std": 13.7, + "cv_pct": 2.14, + "stability": "noisy", + "runs": [ + 603.2, + 639.6, + 630.9, + 655.0, + 638.3, + 638.3, + 651.3, + 641.3, + 647.4, + 641.4, + 659.2, + 629.4, + 645.2, + 649.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json index e6c613d8..473a811c 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 640.7, "throttle_ratio": 0.915, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -121.3 + "ttft_p99_drift_ms": -121.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 640.7, + "std": 13.7, + "cv_pct": 2.14, + "stability": "noisy", + "runs": [ + 603.2, + 639.6, + 630.9, + 655.0, + 638.3, + 638.3, + 651.3, + 641.3, + 647.4, + 641.4, + 659.2, + 629.4, + 645.2, + 649.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json index 69c78edd..bd184764 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 854.9, "throttle_ratio": 0.919, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -236.8 + "ttft_p99_drift_ms": -236.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 854.9, + "std": 19.1, + "cv_pct": 2.23, + "stability": "noisy", + "runs": [ + 814.1, + 856.6, + 858.9, + 849.2, + 850.8, + 849.4, + 876.6, + 828.0, + 856.1, + 852.3, + 846.7, + 879.9, + 864.3, + 886.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json index 015f9cbc..b6150f92 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 854.9, "throttle_ratio": 0.919, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -236.8 + "ttft_p99_drift_ms": -236.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 854.9, + "std": 19.1, + "cv_pct": 2.23, + "stability": "noisy", + "runs": [ + 814.1, + 856.6, + 858.9, + 849.2, + 850.8, + 849.4, + 876.6, + 828.0, + 856.1, + 852.3, + 846.7, + 879.9, + 864.3, + 886.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json index 6f634adb..e8657353 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 628.8, "throttle_ratio": 0.916, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -237.0 + "ttft_p99_drift_ms": -237.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 628.8, + "std": 16.5, + "cv_pct": 2.63, + "stability": "noisy", + "runs": [ + 593.7, + 648.2, + 608.6, + 648.0, + 610.4, + 627.1, + 619.1, + 648.0, + 635.4, + 623.2, + 640.1, + 638.9, + 625.9, + 636.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json index 3f281590..3aa11072 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 628.8, "throttle_ratio": 0.916, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -237.0 + "ttft_p99_drift_ms": -237.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 628.8, + "std": 16.5, + "cv_pct": 2.63, + "stability": "noisy", + "runs": [ + 593.7, + 648.2, + 608.6, + 648.0, + 610.4, + 627.1, + 619.1, + 648.0, + 635.4, + 623.2, + 640.1, + 638.9, + 625.9, + 636.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/result.json index 814021f9..05ca431a 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 567.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -121.8 + "ttft_p99_drift_ms": -121.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 567.7, + "std": 10.8, + "cv_pct": 1.9, + "stability": "stable", + "runs": [ + 540.8, + 556.4, + 564.4, + 577.5, + 563.1, + 568.6, + 569.2, + 569.7, + 569.3, + 570.9, + 574.7, + 583.3, + 559.2, + 580.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/sustained/result.json index 42d3a726..a1dcd617 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 567.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -121.8 + "ttft_p99_drift_ms": -121.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 567.7, + "std": 10.8, + "cv_pct": 1.9, + "stability": "stable", + "runs": [ + 540.8, + 556.4, + 564.4, + 577.5, + 563.1, + 568.6, + 569.2, + 569.7, + 569.3, + 570.9, + 574.7, + 583.3, + 559.2, + 580.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/result.json index b8776e99..252e1be8 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/result.json @@ -345,7 +345,30 @@ "sustained_throughput_tokens_per_sec": 5995.2, "throttle_ratio": 0.901, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -25.3 + "ttft_p99_drift_ms": -25.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 5995.2, + "std": 266.2, + "cv_pct": 4.44, + "stability": "noisy", + "runs": [ + 6220.0, + 6244.2, + 6209.5, + 6276.3, + 6214.1, + 6252.7, + 6249.8, + 5974.5, + 5679.0, + 5654.4, + 5674.7, + 5677.9, + 5730.7, + 5874.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/sustained/result.json index 3098b1c7..df53813f 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 5995.2, "throttle_ratio": 0.901, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -25.3 + "ttft_p99_drift_ms": -25.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 5995.2, + "std": 266.2, + "cv_pct": 4.44, + "stability": "noisy", + "runs": [ + 6220.0, + 6244.2, + 6209.5, + 6276.3, + 6214.1, + 6252.7, + 6249.8, + 5974.5, + 5679.0, + 5654.4, + 5674.7, + 5677.9, + 5730.7, + 5874.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json index fb016a51..4399b583 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json @@ -511,7 +511,44 @@ "sustained_throughput_tokens_per_sec": 339.8, "throttle_ratio": 0.91, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -3.1 + "ttft_p99_drift_ms": -3.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 339.8, + "std": 8.8, + "cv_pct": 2.58, + "stability": "noisy", + "runs": [ + 334.4, + 352.3, + 338.5, + 327.8, + 339.6, + 356.3, + 339.2, + 348.1, + 339.9, + 348.3, + 339.3, + 339.5, + 334.5, + 339.3, + 336.5, + 352.2, + 329.1, + 343.0, + 332.5, + 340.7, + 324.4, + 331.1, + 356.6, + 331.0, + 331.0, + 351.4, + 332.6, + 344.9 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json index 73561592..21a9742a 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json @@ -411,7 +411,44 @@ "sustained_throughput_tokens_per_sec": 339.8, "throttle_ratio": 0.91, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -3.1 + "ttft_p99_drift_ms": -3.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 339.8, + "std": 8.8, + "cv_pct": 2.58, + "stability": "noisy", + "runs": [ + 334.4, + 352.3, + 338.5, + 327.8, + 339.6, + 356.3, + 339.2, + 348.1, + 339.9, + 348.3, + 339.3, + 339.5, + 334.5, + 339.3, + 336.5, + 352.2, + 329.1, + 343.0, + 332.5, + 340.7, + 324.4, + 331.1, + 356.6, + 331.0, + 331.0, + 351.4, + 332.6, + 344.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/result.json index c97e8e54..a3591049 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 334.4, "throttle_ratio": 0.867, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -349.6 + "ttft_p99_drift_ms": -349.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 334.4, + "std": 11.9, + "cv_pct": 3.57, + "stability": "noisy", + "runs": [ + 304.8, + 345.4, + 326.5, + 351.5, + 327.2, + 338.0, + 326.8, + 337.5, + 339.0, + 343.2, + 332.3, + 348.6, + 326.4, + 334.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/sustained/result.json index c5116404..e34a05c5 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 334.4, "throttle_ratio": 0.867, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -349.6 + "ttft_p99_drift_ms": -349.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 334.4, + "std": 11.9, + "cv_pct": 3.57, + "stability": "noisy", + "runs": [ + 304.8, + 345.4, + 326.5, + 351.5, + 327.2, + 338.0, + 326.8, + 337.5, + 339.0, + 343.2, + 332.3, + 348.6, + 326.4, + 334.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json index 2d419279..07afef8c 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 472.1, "throttle_ratio": 0.907, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -85.0 + "ttft_p99_drift_ms": -85.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 472.1, + "std": 11.5, + "cv_pct": 2.44, + "stability": "noisy", + "runs": [ + 445.8, + 458.1, + 469.9, + 469.6, + 465.7, + 477.8, + 479.1, + 487.9, + 471.7, + 491.4, + 465.6, + 476.4, + 473.9, + 476.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json index 93a2b9af..13a289a2 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 472.1, "throttle_ratio": 0.907, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -85.0 + "ttft_p99_drift_ms": -85.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 472.1, + "std": 11.5, + "cv_pct": 2.44, + "stability": "noisy", + "runs": [ + 445.8, + 458.1, + 469.9, + 469.6, + 465.7, + 477.8, + 479.1, + 487.9, + 471.7, + 491.4, + 465.6, + 476.4, + 473.9, + 476.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json index 0620dcb8..b8cc75c0 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 606.2, "throttle_ratio": 0.916, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -238.3 + "ttft_p99_drift_ms": -238.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 606.2, + "std": 15.7, + "cv_pct": 2.59, + "stability": "noisy", + "runs": [ + 584.6, + 597.4, + 624.2, + 591.2, + 638.4, + 590.3, + 598.1, + 616.0, + 601.1, + 605.8, + 616.5, + 591.5, + 624.0, + 607.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json index f32285f8..e394a5a0 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 606.2, "throttle_ratio": 0.916, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -238.3 + "ttft_p99_drift_ms": -238.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 606.2, + "std": 15.7, + "cv_pct": 2.59, + "stability": "noisy", + "runs": [ + 584.6, + 597.4, + 624.2, + 591.2, + 638.4, + 590.3, + 598.1, + 616.0, + 601.1, + 605.8, + 616.5, + 591.5, + 624.0, + 607.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/result.json index ef22615a..2b98e86b 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 506.0, "throttle_ratio": 0.938, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -216.0 + "ttft_p99_drift_ms": -216.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 506.0, + "std": 9.3, + "cv_pct": 1.84, + "stability": "stable", + "runs": [ + 488.0, + 512.2, + 495.5, + 515.4, + 501.6, + 507.0, + 508.3, + 517.7, + 509.8, + 505.8, + 507.7, + 499.2, + 520.5, + 494.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/sustained/result.json index 8e45b655..539c6bba 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 506.0, "throttle_ratio": 0.938, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -216.0 + "ttft_p99_drift_ms": -216.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 506.0, + "std": 9.3, + "cv_pct": 1.84, + "stability": "stable", + "runs": [ + 488.0, + 512.2, + 495.5, + 515.4, + 501.6, + 507.0, + 508.3, + 517.7, + 509.8, + 505.8, + 507.7, + 499.2, + 520.5, + 494.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json index 2f01c7e8..1c05e652 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 438.9, "throttle_ratio": 0.931, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -133.3 + "ttft_p99_drift_ms": -133.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 438.9, + "std": 10.7, + "cv_pct": 2.43, + "stability": "noisy", + "runs": [ + 419.8, + 434.5, + 439.4, + 447.0, + 426.1, + 445.6, + 447.0, + 434.7, + 441.1, + 448.9, + 418.8, + 448.8, + 449.6, + 443.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json index f0f86a0a..a6d71b51 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 438.9, "throttle_ratio": 0.931, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -133.3 + "ttft_p99_drift_ms": -133.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 438.9, + "std": 10.7, + "cv_pct": 2.43, + "stability": "noisy", + "runs": [ + 419.8, + 434.5, + 439.4, + 447.0, + 426.1, + 445.6, + 447.0, + 434.7, + 441.1, + 448.9, + 418.8, + 448.8, + 449.6, + 443.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json index 8b04786a..cdcfdb99 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 1698.1, "throttle_ratio": 0.922, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -105.2 + "ttft_p99_drift_ms": -105.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1698.1, + "std": 39.2, + "cv_pct": 2.31, + "stability": "noisy", + "runs": [ + 1715.3, + 1751.9, + 1713.1, + 1671.7, + 1724.1, + 1660.5, + 1687.0, + 1622.5, + 1658.9, + 1760.4, + 1680.1, + 1676.2, + 1735.0, + 1716.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json index db87a8e7..c78793fb 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 1698.1, "throttle_ratio": 0.922, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -105.2 + "ttft_p99_drift_ms": -105.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1698.1, + "std": 39.2, + "cv_pct": 2.31, + "stability": "noisy", + "runs": [ + 1715.3, + 1751.9, + 1713.1, + 1671.7, + 1724.1, + 1660.5, + 1687.0, + 1622.5, + 1658.9, + 1760.4, + 1680.1, + 1676.2, + 1735.0, + 1716.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json index af5e0a5c..3e34cc78 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json @@ -553,7 +553,44 @@ "sustained_throughput_tokens_per_sec": 104.5, "throttle_ratio": 0.761, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 92.3 + "ttft_p99_drift_ms": 92.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 104.5, + "std": 7.2, + "cv_pct": 6.86, + "stability": "unstable", + "runs": [ + 96.8, + 106.6, + 106.6, + 100.9, + 109.9, + 105.4, + 98.3, + 105.1, + 103.0, + 113.9, + 101.1, + 95.9, + 108.9, + 110.1, + 94.4, + 102.9, + 111.3, + 103.9, + 118.0, + 89.8, + 114.0, + 101.2, + 95.2, + 108.8, + 115.3, + 94.1, + 109.8, + 104.4 + ] + } }, "interactive": { "ttft_ms_p50": 277.19, diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json index cb86a2ad..a6287796 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json @@ -451,7 +451,44 @@ "sustained_throughput_tokens_per_sec": 104.5, "throttle_ratio": 0.761, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 92.3 + "ttft_p99_drift_ms": 92.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 104.5, + "std": 7.2, + "cv_pct": 6.86, + "stability": "unstable", + "runs": [ + 96.8, + 106.6, + 106.6, + 100.9, + 109.9, + 105.4, + 98.3, + 105.1, + 103.0, + 113.9, + 101.1, + 95.9, + 108.9, + 110.1, + 94.4, + 102.9, + 111.3, + 103.9, + 118.0, + 89.8, + 114.0, + 101.2, + 95.2, + 108.8, + 115.3, + 94.1, + 109.8, + 104.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json index f57c2d05..67f09696 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json @@ -550,7 +550,44 @@ "sustained_throughput_tokens_per_sec": 325.5, "throttle_ratio": 0.886, "throttle_onset_minute": 5.0, - "ttft_p99_drift_ms": -19.3 + "ttft_p99_drift_ms": -19.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 325.5, + "std": 9.7, + "cv_pct": 2.99, + "stability": "noisy", + "runs": [ + 348.2, + 317.8, + 340.5, + 312.2, + 342.5, + 323.8, + 311.0, + 329.7, + 314.2, + 308.6, + 338.2, + 323.2, + 326.9, + 326.3, + 324.1, + 329.2, + 329.8, + 323.9, + 333.7, + 318.7, + 329.3, + 316.3, + 317.7, + 334.9, + 329.4, + 322.7, + 321.0, + 319.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json index 433f8f6f..76ba3ee7 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json @@ -451,7 +451,44 @@ "sustained_throughput_tokens_per_sec": 325.5, "throttle_ratio": 0.886, "throttle_onset_minute": 5.0, - "ttft_p99_drift_ms": -19.3 + "ttft_p99_drift_ms": -19.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 325.5, + "std": 9.7, + "cv_pct": 2.99, + "stability": "noisy", + "runs": [ + 348.2, + 317.8, + 340.5, + 312.2, + 342.5, + 323.8, + 311.0, + 329.7, + 314.2, + 308.6, + 338.2, + 323.2, + 326.9, + 326.3, + 324.1, + 329.2, + 329.8, + 323.9, + 333.7, + 318.7, + 329.3, + 316.3, + 317.7, + 334.9, + 329.4, + 322.7, + 321.0, + 319.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/result.json index 1308f73d..11baab5a 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 707.5, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -33.3 + "ttft_p99_drift_ms": -33.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 707.5, + "std": 9.9, + "cv_pct": 1.4, + "stability": "stable", + "runs": [ + 700.3, + 705.2, + 720.8, + 690.5, + 705.3, + 718.8, + 706.6, + 711.1, + 700.4, + 708.3, + 720.0, + 710.9, + 691.8, + 699.3, + 726.0, + 702.1, + 708.6, + 701.3, + 722.9, + 700.6, + 714.3, + 701.7, + 695.2, + 728.2, + 699.1, + 705.2, + 708.6, + 707.1 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/sustained/result.json index cf184255..62c1d274 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/sustained/result.json @@ -422,7 +422,44 @@ "sustained_throughput_tokens_per_sec": 707.5, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -33.3 + "ttft_p99_drift_ms": -33.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 707.5, + "std": 9.9, + "cv_pct": 1.4, + "stability": "stable", + "runs": [ + 700.3, + 705.2, + 720.8, + 690.5, + 705.3, + 718.8, + 706.6, + 711.1, + 700.4, + 708.3, + 720.0, + 710.9, + 691.8, + 699.3, + 726.0, + 702.1, + 708.6, + 701.3, + 722.9, + 700.6, + 714.3, + 701.7, + 695.2, + 728.2, + 699.1, + 705.2, + 708.6, + 707.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json index fb307d1f..262863b1 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json @@ -425,7 +425,30 @@ "sustained_throughput_tokens_per_sec": 676.2, "throttle_ratio": 0.429, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -29850.4 + "ttft_p99_drift_ms": -29850.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 676.2, + "std": 105.7, + "cv_pct": 15.64, + "stability": "unstable", + "runs": [ + 310.3, + 701.9, + 691.2, + 717.0, + 697.2, + 705.7, + 708.4, + 704.3, + 690.8, + 713.3, + 698.7, + 694.2, + 710.3, + 723.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json index 9d9b9a61..b7c57364 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json @@ -312,7 +312,30 @@ "sustained_throughput_tokens_per_sec": 676.2, "throttle_ratio": 0.429, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -29850.4 + "ttft_p99_drift_ms": -29850.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 676.2, + "std": 105.7, + "cv_pct": 15.64, + "stability": "unstable", + "runs": [ + 310.3, + 701.9, + 691.2, + 717.0, + 697.2, + 705.7, + 708.4, + 704.3, + 690.8, + 713.3, + 698.7, + 694.2, + 710.3, + 723.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json index eb2298d0..b783b3b3 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json @@ -425,7 +425,30 @@ "sustained_throughput_tokens_per_sec": 1381.4, "throttle_ratio": 0.439, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -30389.8 + "ttft_p99_drift_ms": -30389.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1381.4, + "std": 213.7, + "cv_pct": 15.47, + "stability": "unstable", + "runs": [ + 641.1, + 1449.1, + 1415.5, + 1411.0, + 1429.2, + 1424.7, + 1453.5, + 1454.5, + 1422.3, + 1435.2, + 1456.1, + 1440.6, + 1447.5, + 1459.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json index 22d6cdb7..b765c762 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json @@ -312,7 +312,30 @@ "sustained_throughput_tokens_per_sec": 1381.4, "throttle_ratio": 0.439, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -30389.8 + "ttft_p99_drift_ms": -30389.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1381.4, + "std": 213.7, + "cv_pct": 15.47, + "stability": "unstable", + "runs": [ + 641.1, + 1449.1, + 1415.5, + 1411.0, + 1429.2, + 1424.7, + 1453.5, + 1454.5, + 1422.3, + 1435.2, + 1456.1, + 1440.6, + 1447.5, + 1459.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json index bac4cb5a..3ef8e5df 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json @@ -425,7 +425,30 @@ "sustained_throughput_tokens_per_sec": 1148.7, "throttle_ratio": 0.36, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -35987.3 + "ttft_p99_drift_ms": -35987.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1148.7, + "std": 204.2, + "cv_pct": 17.78, + "stability": "unstable", + "runs": [ + 440.3, + 1192.8, + 1217.1, + 1190.1, + 1223.8, + 1197.8, + 1200.5, + 1215.8, + 1201.4, + 1197.9, + 1216.1, + 1187.5, + 1191.1, + 1209.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json index 7ae61d5f..2d1560bf 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json @@ -312,7 +312,30 @@ "sustained_throughput_tokens_per_sec": 1148.7, "throttle_ratio": 0.36, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -35987.3 + "ttft_p99_drift_ms": -35987.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1148.7, + "std": 204.2, + "cv_pct": 17.78, + "stability": "unstable", + "runs": [ + 440.3, + 1192.8, + 1217.1, + 1190.1, + 1223.8, + 1197.8, + 1200.5, + 1215.8, + 1201.4, + 1197.9, + 1216.1, + 1187.5, + 1191.1, + 1209.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/result.json index e665b3f9..5b6f5ea4 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/result.json @@ -466,7 +466,44 @@ "sustained_throughput_tokens_per_sec": 51.5, "throttle_ratio": 0.845, "throttle_onset_minute": 11.0, - "ttft_p99_drift_ms": -35433.6 + "ttft_p99_drift_ms": -35433.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 51.5, + "std": 2.0, + "cv_pct": 3.83, + "stability": "noisy", + "runs": [ + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 55.4, + 51.3, + 51.2, + 46.9, + 55.4, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 55.4, + 47.0, + 55.5, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/sustained/result.json index 1c772358..5d0c5acb 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/sustained/result.json @@ -422,7 +422,44 @@ "sustained_throughput_tokens_per_sec": 51.5, "throttle_ratio": 0.845, "throttle_onset_minute": 11.0, - "ttft_p99_drift_ms": -35433.6 + "ttft_p99_drift_ms": -35433.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 51.5, + "std": 2.0, + "cv_pct": 3.83, + "stability": "noisy", + "runs": [ + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 55.4, + 51.3, + 51.2, + 46.9, + 55.4, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 55.4, + 47.0, + 55.5, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json index cac5f1b6..8219aee4 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json @@ -359,7 +359,30 @@ "sustained_throughput_tokens_per_sec": 3941.2, "throttle_ratio": 0.137, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -49045.0 + "ttft_p99_drift_ms": -49045.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3941.2, + "std": 974.0, + "cv_pct": 24.71, + "stability": "unstable", + "runs": [ + 683.0, + 4100.0, + 4041.5, + 4995.0, + 4429.7, + 4028.5, + 4021.4, + 4158.3, + 3928.9, + 4175.5, + 4112.6, + 4274.6, + 4203.8, + 4024.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json index 7dc690da..57d94568 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 3941.2, "throttle_ratio": 0.137, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -49045.0 + "ttft_p99_drift_ms": -49045.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3941.2, + "std": 974.0, + "cv_pct": 24.71, + "stability": "unstable", + "runs": [ + 683.0, + 4100.0, + 4041.5, + 4995.0, + 4429.7, + 4028.5, + 4021.4, + 4158.3, + 3928.9, + 4175.5, + 4112.6, + 4274.6, + 4203.8, + 4024.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/result.json index 96df1a3f..719e3163 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/result.json @@ -478,7 +478,44 @@ "sustained_throughput_tokens_per_sec": 907.1, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -1.0 + "ttft_p99_drift_ms": -1.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 907.1, + "std": 11.3, + "cv_pct": 1.25, + "stability": "stable", + "runs": [ + 932.2, + 905.7, + 909.4, + 893.4, + 919.9, + 906.1, + 904.7, + 913.0, + 911.6, + 910.9, + 894.4, + 912.3, + 910.6, + 905.7, + 910.2, + 903.0, + 898.6, + 911.5, + 888.3, + 911.6, + 900.5, + 903.7, + 924.4, + 883.7, + 911.4, + 914.1, + 922.4, + 884.7 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/sustained/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/sustained/result.json index f43caa42..325ab779 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/sustained/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 907.1, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -1.0 + "ttft_p99_drift_ms": -1.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 907.1, + "std": 11.3, + "cv_pct": 1.25, + "stability": "stable", + "runs": [ + 932.2, + 905.7, + 909.4, + 893.4, + 919.9, + 906.1, + 904.7, + 913.0, + 911.6, + 910.9, + 894.4, + 912.3, + 910.6, + 905.7, + 910.2, + 903.0, + 898.6, + 911.5, + 888.3, + 911.6, + 900.5, + 903.7, + 924.4, + 883.7, + 911.4, + 914.1, + 922.4, + 884.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json index f718da39..d6d790d1 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json @@ -421,7 +421,44 @@ "sustained_throughput_tokens_per_sec": 142.6, "throttle_ratio": 0.8, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 327.4 + "ttft_p99_drift_ms": 327.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 142.6, + "std": 13.3, + "cv_pct": 9.33, + "stability": "unstable", + "runs": [ + 136.5, + 136.6, + 136.5, + 136.5, + 170.7, + 136.5, + 136.5, + 136.5, + 136.5, + 170.7, + 136.5, + 136.6, + 136.6, + 136.5, + 170.5, + 136.5, + 136.6, + 136.6, + 136.5, + 136.6, + 170.7, + 136.5, + 136.5, + 136.6, + 136.5, + 170.7, + 136.5, + 136.6 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json index dad0da89..e50109d1 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 142.6, "throttle_ratio": 0.8, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 327.4 + "ttft_p99_drift_ms": 327.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 142.6, + "std": 13.3, + "cv_pct": 9.33, + "stability": "unstable", + "runs": [ + 136.5, + 136.6, + 136.5, + 136.5, + 170.7, + 136.5, + 136.5, + 136.5, + 136.5, + 170.7, + 136.5, + 136.6, + 136.6, + 136.5, + 170.5, + 136.5, + 136.6, + 136.6, + 136.5, + 136.6, + 170.7, + 136.5, + 136.5, + 136.6, + 136.5, + 170.7, + 136.5, + 136.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/result.json index 396625d1..3e139b6c 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 6144.7, "throttle_ratio": 0.96, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -19.0 + "ttft_p99_drift_ms": -19.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 6144.7, + "std": 64.6, + "cv_pct": 1.05, + "stability": "stable", + "runs": [ + 6336.2, + 6162.6, + 6134.7, + 6123.2, + 6087.1, + 6131.9, + 6123.4, + 6204.8, + 6085.4, + 6082.5, + 6153.4, + 6160.7, + 6110.4, + 6129.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/sustained/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/sustained/result.json index 9d299d69..007da95e 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/sustained/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 6144.7, "throttle_ratio": 0.96, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -19.0 + "ttft_p99_drift_ms": -19.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 6144.7, + "std": 64.6, + "cv_pct": 1.05, + "stability": "stable", + "runs": [ + 6336.2, + 6162.6, + 6134.7, + 6123.2, + 6087.1, + 6131.9, + 6123.4, + 6204.8, + 6085.4, + 6082.5, + 6153.4, + 6160.7, + 6110.4, + 6129.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json index b67c6d7d..8b542132 100644 --- a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json +++ b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json @@ -526,7 +526,44 @@ "sustained_throughput_tokens_per_sec": 709.2, "throttle_ratio": 0.791, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": 45.9 + "ttft_p99_drift_ms": 45.9, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 709.2, + "std": 40.9, + "cv_pct": 5.77, + "stability": "unstable", + "runs": [ + 851.5, + 842.9, + 677.1, + 719.3, + 673.5, + 699.1, + 702.7, + 686.1, + 706.4, + 676.1, + 719.9, + 690.8, + 711.4, + 703.6, + 704.7, + 700.4, + 699.5, + 694.9, + 707.4, + 674.7, + 705.4, + 705.4, + 713.4, + 690.6, + 696.4, + 701.8, + 696.1, + 705.6 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json index 412e3363..70d91c0f 100644 --- a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json @@ -428,7 +428,44 @@ "sustained_throughput_tokens_per_sec": 709.2, "throttle_ratio": 0.791, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": 45.9 + "ttft_p99_drift_ms": 45.9, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 709.2, + "std": 40.9, + "cv_pct": 5.77, + "stability": "unstable", + "runs": [ + 851.5, + 842.9, + 677.1, + 719.3, + 673.5, + 699.1, + 702.7, + 686.1, + 706.4, + 676.1, + 719.9, + 690.8, + 711.4, + 703.6, + 704.7, + 700.4, + 699.5, + 694.9, + 707.4, + 674.7, + 705.4, + 705.4, + 713.4, + 690.6, + 696.4, + 701.8, + 696.1, + 705.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/result.json index af08ca34..43b139da 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 709.4, "throttle_ratio": 0.868, "throttle_onset_minute": 13.0, - "ttft_p99_drift_ms": -149.4 + "ttft_p99_drift_ms": -149.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.4, + "std": 30.8, + "cv_pct": 4.34, + "stability": "noisy", + "runs": [ + 709.5, + 703.8, + 729.3, + 727.1, + 713.1, + 728.9, + 721.0, + 715.9, + 726.3, + 734.4, + 716.6, + 727.1, + 641.9, + 637.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/sustained/result.json index dc96dc06..5d2bc3cc 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 709.4, "throttle_ratio": 0.868, "throttle_onset_minute": 13.0, - "ttft_p99_drift_ms": -149.4 + "ttft_p99_drift_ms": -149.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.4, + "std": 30.8, + "cv_pct": 4.34, + "stability": "noisy", + "runs": [ + 709.5, + 703.8, + 729.3, + 727.1, + 713.1, + 728.9, + 721.0, + 715.9, + 726.3, + 734.4, + 716.6, + 727.1, + 641.9, + 637.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json index 2d2ab534..5bacdc39 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 713.4, "throttle_ratio": 0.888, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -69.4 + "ttft_p99_drift_ms": -69.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 713.4, + "std": 21.2, + "cv_pct": 2.97, + "stability": "noisy", + "runs": [ + 736.7, + 762.2, + 721.6, + 676.8, + 719.7, + 711.0, + 715.7, + 695.0, + 712.8, + 702.3, + 727.1, + 685.7, + 708.4, + 712.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json index f1c1cbfb..a1017f7e 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 713.4, "throttle_ratio": 0.888, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -69.4 + "ttft_p99_drift_ms": -69.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 713.4, + "std": 21.2, + "cv_pct": 2.97, + "stability": "noisy", + "runs": [ + 736.7, + 762.2, + 721.6, + 676.8, + 719.7, + 711.0, + 715.7, + 695.0, + 712.8, + 702.3, + 727.1, + 685.7, + 708.4, + 712.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/result.json index fd1ef782..73d2749d 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 649.9, "throttle_ratio": 0.886, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -146.9 + "ttft_p99_drift_ms": -146.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 649.9, + "std": 21.8, + "cv_pct": 3.35, + "stability": "noisy", + "runs": [ + 713.6, + 677.3, + 649.8, + 636.1, + 652.4, + 646.2, + 633.3, + 649.0, + 638.9, + 655.1, + 642.4, + 636.4, + 632.1, + 636.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/sustained/result.json index 6caffea6..9705f2f2 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 649.9, "throttle_ratio": 0.886, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -146.9 + "ttft_p99_drift_ms": -146.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 649.9, + "std": 21.8, + "cv_pct": 3.35, + "stability": "noisy", + "runs": [ + 713.6, + 677.3, + 649.8, + 636.1, + 652.4, + 646.2, + 633.3, + 649.0, + 638.9, + 655.1, + 642.4, + 636.4, + 632.1, + 636.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/result.json index 488b494f..51797ff3 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 708.2, "throttle_ratio": 0.878, "throttle_onset_minute": 11.0, - "ttft_p99_drift_ms": -145.8 + "ttft_p99_drift_ms": -145.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 708.2, + "std": 34.8, + "cv_pct": 4.91, + "stability": "noisy", + "runs": [ + 710.4, + 720.8, + 741.0, + 742.8, + 739.9, + 728.3, + 734.6, + 723.4, + 737.2, + 704.2, + 664.2, + 654.3, + 662.2, + 652.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/sustained/result.json index 801e6a43..4ef6e3cb 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 708.2, "throttle_ratio": 0.878, "throttle_onset_minute": 11.0, - "ttft_p99_drift_ms": -145.8 + "ttft_p99_drift_ms": -145.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 708.2, + "std": 34.8, + "cv_pct": 4.91, + "stability": "noisy", + "runs": [ + 710.4, + 720.8, + 741.0, + 742.8, + 739.9, + 728.3, + 734.6, + 723.4, + 737.2, + 704.2, + 664.2, + 654.3, + 662.2, + 652.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/result.json index 980a521e..13d327f9 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 694.7, "throttle_ratio": 0.932, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -100.3 + "ttft_p99_drift_ms": -100.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 694.7, + "std": 12.1, + "cv_pct": 1.74, + "stability": "stable", + "runs": [ + 730.5, + 688.0, + 700.2, + 690.9, + 686.4, + 704.0, + 690.4, + 691.5, + 700.7, + 690.2, + 695.2, + 680.6, + 692.2, + 685.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/sustained/result.json index 736cc3f8..0d28b31f 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 694.7, "throttle_ratio": 0.932, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -100.3 + "ttft_p99_drift_ms": -100.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 694.7, + "std": 12.1, + "cv_pct": 1.74, + "stability": "stable", + "runs": [ + 730.5, + 688.0, + 700.2, + 690.9, + 686.4, + 704.0, + 690.4, + 691.5, + 700.7, + 690.2, + 695.2, + 680.6, + 692.2, + 685.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json index d36e3809..c5b80fc8 100644 --- a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json +++ b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json @@ -471,7 +471,44 @@ "sustained_throughput_tokens_per_sec": 132.9, "throttle_ratio": 0.6, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -55.8 + "ttft_p99_drift_ms": -55.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 132.9, + "std": 14.2, + "cv_pct": 10.69, + "stability": "unstable", + "runs": [ + 136.5, + 136.6, + 136.4, + 136.5, + 136.6, + 136.6, + 136.4, + 136.5, + 136.5, + 136.6, + 136.6, + 136.5, + 170.7, + 136.5, + 136.6, + 136.5, + 136.5, + 136.5, + 136.6, + 136.5, + 136.6, + 136.5, + 136.5, + 136.5, + 102.4, + 102.4, + 102.4, + 102.4 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json index 6d8e3212..e9e264b1 100644 --- a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json @@ -428,7 +428,44 @@ "sustained_throughput_tokens_per_sec": 132.9, "throttle_ratio": 0.6, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -55.8 + "ttft_p99_drift_ms": -55.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 132.9, + "std": 14.2, + "cv_pct": 10.69, + "stability": "unstable", + "runs": [ + 136.5, + 136.6, + 136.4, + 136.5, + 136.6, + 136.6, + 136.4, + 136.5, + 136.5, + 136.6, + 136.6, + 136.5, + 170.7, + 136.5, + 136.6, + 136.5, + 136.5, + 136.5, + 136.6, + 136.5, + 136.6, + 136.5, + 136.5, + 136.5, + 102.4, + 102.4, + 102.4, + 102.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/result.json b/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/result.json index 0f171076..7ffc29f7 100644 --- a/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/result.json +++ b/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/result.json @@ -431,7 +431,30 @@ "sustained_throughput_tokens_per_sec": 1425.4, "throttle_ratio": 0.826, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -364.2 + "ttft_p99_drift_ms": -364.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1425.4, + "std": 66.2, + "cv_pct": 4.65, + "stability": "noisy", + "runs": [ + 1306.2, + 1417.0, + 1440.4, + 1411.2, + 1413.3, + 1581.2, + 1523.2, + 1435.7, + 1342.6, + 1396.7, + 1408.2, + 1415.5, + 1436.7, + 1427.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/sustained/result.json b/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/sustained/result.json index 3889eef9..0d2a661d 100644 --- a/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/sustained/result.json @@ -347,7 +347,30 @@ "sustained_throughput_tokens_per_sec": 1425.4, "throttle_ratio": 0.826, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -364.2 + "ttft_p99_drift_ms": -364.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1425.4, + "std": 66.2, + "cv_pct": 4.65, + "stability": "noisy", + "runs": [ + 1306.2, + 1417.0, + 1440.4, + 1411.2, + 1413.3, + 1581.2, + 1523.2, + 1435.7, + 1342.6, + 1396.7, + 1408.2, + 1415.5, + 1436.7, + 1427.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json index 01daefd8..60bc2c9d 100644 --- a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json +++ b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json @@ -596,7 +596,44 @@ "sustained_throughput_tokens_per_sec": 241.2, "throttle_ratio": 0.807, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -1.5 + "ttft_p99_drift_ms": -1.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 241.2, + "std": 12.7, + "cv_pct": 5.28, + "stability": "unstable", + "runs": [ + 283.6, + 281.0, + 237.6, + 240.6, + 232.5, + 236.4, + 238.2, + 242.7, + 241.5, + 231.0, + 244.4, + 236.3, + 242.5, + 235.3, + 230.9, + 237.7, + 243.4, + 235.9, + 240.4, + 232.4, + 240.7, + 246.4, + 232.5, + 239.3, + 233.9, + 235.6, + 252.2, + 228.8 + ] + } }, "interactive": { "ttft_ms_p50": 77.23, diff --git a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json index 3a4cd706..4d23c22e 100644 --- a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json +++ b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json @@ -497,7 +497,44 @@ "sustained_throughput_tokens_per_sec": 241.2, "throttle_ratio": 0.807, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -1.5 + "ttft_p99_drift_ms": -1.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 241.2, + "std": 12.7, + "cv_pct": 5.28, + "stability": "unstable", + "runs": [ + 283.6, + 281.0, + 237.6, + 240.6, + 232.5, + 236.4, + 238.2, + 242.7, + 241.5, + 231.0, + 244.4, + 236.3, + 242.5, + 235.3, + 230.9, + 237.7, + 243.4, + 235.9, + 240.4, + 232.4, + 240.7, + 246.4, + 232.5, + 239.3, + 233.9, + 235.6, + 252.2, + 228.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/result.json b/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/result.json index b27ea8ea..ffb5d59a 100644 --- a/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/result.json +++ b/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/result.json @@ -594,7 +594,44 @@ "sustained_throughput_tokens_per_sec": 591.5, "throttle_ratio": 0.939, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 12.0 + "ttft_p99_drift_ms": 12.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 591.5, + "std": 9.7, + "cv_pct": 1.64, + "stability": "stable", + "runs": [ + 576.8, + 613.1, + 587.8, + 582.0, + 595.7, + 591.5, + 589.9, + 585.6, + 599.3, + 594.4, + 596.9, + 576.0, + 608.9, + 579.4, + 599.9, + 588.2, + 607.2, + 576.5, + 586.3, + 582.1, + 593.9, + 599.3, + 597.4, + 588.5, + 583.3, + 598.1, + 588.0, + 596.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/sustained/result.json b/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/sustained/result.json index 08974360..2cf787f8 100644 --- a/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/sustained/result.json +++ b/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/sustained/result.json @@ -498,7 +498,44 @@ "sustained_throughput_tokens_per_sec": 591.5, "throttle_ratio": 0.939, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 12.0 + "ttft_p99_drift_ms": 12.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 591.5, + "std": 9.7, + "cv_pct": 1.64, + "stability": "stable", + "runs": [ + 576.8, + 613.1, + 587.8, + 582.0, + 595.7, + 591.5, + 589.9, + 585.6, + 599.3, + 594.4, + 596.9, + 576.0, + 608.9, + 579.4, + 599.9, + 588.2, + 607.2, + 576.5, + 586.3, + 582.1, + 593.9, + 599.3, + 597.4, + 588.5, + 583.3, + 598.1, + 588.0, + 596.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/result.json b/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/result.json index e6b4b502..71a19486 100644 --- a/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/result.json @@ -523,7 +523,44 @@ "sustained_throughput_tokens_per_sec": 486.6, "throttle_ratio": 0.918, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 1.0 + "ttft_p99_drift_ms": 1.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 486.6, + "std": 9.5, + "cv_pct": 1.95, + "stability": "stable", + "runs": [ + 473.5, + 493.5, + 488.1, + 510.3, + 497.4, + 487.5, + 499.5, + 485.3, + 478.4, + 480.3, + 493.7, + 476.1, + 490.1, + 479.3, + 490.6, + 489.5, + 479.2, + 488.8, + 477.4, + 473.5, + 493.7, + 488.2, + 476.5, + 495.9, + 468.6, + 496.5, + 484.2, + 489.8 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/sustained/result.json index 4a35f301..9339f76d 100644 --- a/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/sustained/result.json @@ -422,7 +422,44 @@ "sustained_throughput_tokens_per_sec": 486.6, "throttle_ratio": 0.918, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 1.0 + "ttft_p99_drift_ms": 1.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 486.6, + "std": 9.5, + "cv_pct": 1.95, + "stability": "stable", + "runs": [ + 473.5, + 493.5, + 488.1, + 510.3, + 497.4, + 487.5, + 499.5, + 485.3, + 478.4, + 480.3, + 493.7, + 476.1, + 490.1, + 479.3, + 490.6, + 489.5, + 479.2, + 488.8, + 477.4, + 473.5, + 493.7, + 488.2, + 476.5, + 495.9, + 468.6, + 496.5, + 484.2, + 489.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/result.json index 10356053..fedfa82a 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 484.8, "throttle_ratio": 0.887, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -202.4 + "ttft_p99_drift_ms": -202.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 484.8, + "std": 15.2, + "cv_pct": 3.14, + "stability": "noisy", + "runs": [ + 443.9, + 489.5, + 495.1, + 480.6, + 500.6, + 476.2, + 492.7, + 486.5, + 482.4, + 486.2, + 499.3, + 465.9, + 499.2, + 488.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/sustained/result.json index d40598ee..99d5bd3a 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 484.8, "throttle_ratio": 0.887, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -202.4 + "ttft_p99_drift_ms": -202.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 484.8, + "std": 15.2, + "cv_pct": 3.14, + "stability": "noisy", + "runs": [ + 443.9, + 489.5, + 495.1, + 480.6, + 500.6, + 476.2, + 492.7, + 486.5, + 482.4, + 486.2, + 499.3, + 465.9, + 499.2, + 488.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json index 688d6ca8..5e8e3865 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 494.9, "throttle_ratio": 0.92, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -111.0 + "ttft_p99_drift_ms": -111.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 494.9, + "std": 12.6, + "cv_pct": 2.55, + "stability": "noisy", + "runs": [ + 492.6, + 482.1, + 498.6, + 498.3, + 504.2, + 472.5, + 501.9, + 505.4, + 490.9, + 483.1, + 512.3, + 477.3, + 495.1, + 513.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json index 21d4e7a4..e50503cb 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 494.9, "throttle_ratio": 0.92, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -111.0 + "ttft_p99_drift_ms": -111.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 494.9, + "std": 12.6, + "cv_pct": 2.55, + "stability": "noisy", + "runs": [ + 492.6, + 482.1, + 498.6, + 498.3, + 504.2, + 472.5, + 501.9, + 505.4, + 490.9, + 483.1, + 512.3, + 477.3, + 495.1, + 513.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/result.json index 7979d6b7..d0708288 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 648.8, "throttle_ratio": 0.934, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -206.6 + "ttft_p99_drift_ms": -206.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 648.8, + "std": 9.5, + "cv_pct": 1.46, + "stability": "stable", + "runs": [ + 625.3, + 669.7, + 649.9, + 651.4, + 652.7, + 645.9, + 645.9, + 642.4, + 647.3, + 647.3, + 648.6, + 648.7, + 648.9, + 658.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/sustained/result.json index a01e5d58..f669eeb0 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 648.8, "throttle_ratio": 0.934, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -206.6 + "ttft_p99_drift_ms": -206.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 648.8, + "std": 9.5, + "cv_pct": 1.46, + "stability": "stable", + "runs": [ + 625.3, + 669.7, + 649.9, + 651.4, + 652.7, + 645.9, + 645.9, + 642.4, + 647.3, + 647.3, + 648.6, + 648.7, + 648.9, + 658.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json index 5aa2dc27..40be3d5f 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 645.2, "throttle_ratio": 0.889, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -199.4 + "ttft_p99_drift_ms": -199.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 645.2, + "std": 18.2, + "cv_pct": 2.82, + "stability": "noisy", + "runs": [ + 593.7, + 667.9, + 642.7, + 648.9, + 646.6, + 648.3, + 657.5, + 641.8, + 648.5, + 660.6, + 643.0, + 654.4, + 623.1, + 655.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json index cb0bb6a6..9da3e271 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 645.2, "throttle_ratio": 0.889, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -199.4 + "ttft_p99_drift_ms": -199.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 645.2, + "std": 18.2, + "cv_pct": 2.82, + "stability": "noisy", + "runs": [ + 593.7, + 667.9, + 642.7, + 648.9, + 646.6, + 648.3, + 657.5, + 641.8, + 648.5, + 660.6, + 643.0, + 654.4, + 623.1, + 655.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json index d2844329..dad65d4b 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 533.8, "throttle_ratio": 0.908, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -110.9 + "ttft_p99_drift_ms": -110.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 533.8, + "std": 12.3, + "cv_pct": 2.3, + "stability": "noisy", + "runs": [ + 501.4, + 534.3, + 552.1, + 527.3, + 528.0, + 530.2, + 539.2, + 546.2, + 526.4, + 537.3, + 526.7, + 540.2, + 542.8, + 541.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json index b2534826..c3467ba5 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 533.8, "throttle_ratio": 0.908, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -110.9 + "ttft_p99_drift_ms": -110.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 533.8, + "std": 12.3, + "cv_pct": 2.3, + "stability": "noisy", + "runs": [ + 501.4, + 534.3, + 552.1, + 527.3, + 528.0, + 530.2, + 539.2, + 546.2, + 526.4, + 537.3, + 526.7, + 540.2, + 542.8, + 541.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json index 8eb272c8..601fa1f0 100644 --- a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json @@ -464,7 +464,44 @@ "sustained_throughput_tokens_per_sec": 41.4, "throttle_ratio": 0.499, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 516.2 + "ttft_p99_drift_ms": 516.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 41.4, + "std": 14.3, + "cv_pct": 34.47, + "stability": "unstable", + "runs": [ + 34.1, + 34.2, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.1, + 68.3, + 34.2, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.2, + 68.3 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json index d6ad4f0d..92c49d4f 100644 --- a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json @@ -422,7 +422,44 @@ "sustained_throughput_tokens_per_sec": 41.4, "throttle_ratio": 0.499, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 516.2 + "ttft_p99_drift_ms": 516.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 41.4, + "std": 14.3, + "cv_pct": 34.47, + "stability": "unstable", + "runs": [ + 34.1, + 34.2, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.1, + 68.3, + 34.2, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.2, + 68.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/result.json b/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/result.json index 927da139..a3b8ad29 100644 --- a/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/result.json @@ -359,7 +359,30 @@ "sustained_throughput_tokens_per_sec": 1771.6, "throttle_ratio": 0.953, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -71.8 + "ttft_p99_drift_ms": -71.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1771.6, + "std": 28.4, + "cv_pct": 1.6, + "stability": "stable", + "runs": [ + 1749.4, + 1781.1, + 1757.1, + 1808.9, + 1813.9, + 1728.8, + 1761.8, + 1737.5, + 1774.0, + 1778.9, + 1786.9, + 1794.3, + 1729.1, + 1800.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/sustained/result.json index 1d501f43..7938869b 100644 --- a/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 1771.6, "throttle_ratio": 0.953, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -71.8 + "ttft_p99_drift_ms": -71.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1771.6, + "std": 28.4, + "cv_pct": 1.6, + "stability": "stable", + "runs": [ + 1749.4, + 1781.1, + 1757.1, + 1808.9, + 1813.9, + 1728.8, + 1761.8, + 1737.5, + 1774.0, + 1778.9, + 1786.9, + 1794.3, + 1729.1, + 1800.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/result.json b/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/result.json index 29e70e1b..768a8fd0 100644 --- a/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/result.json +++ b/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/result.json @@ -564,7 +564,44 @@ "sustained_throughput_tokens_per_sec": 176.0, "throttle_ratio": 0.847, "throttle_onset_minute": 5.0, - "ttft_p99_drift_ms": -9.3 + "ttft_p99_drift_ms": -9.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 176.0, + "std": 6.5, + "cv_pct": 3.69, + "stability": "noisy", + "runs": [ + 174.4, + 171.1, + 181.0, + 169.7, + 177.7, + 181.8, + 176.6, + 177.2, + 177.7, + 176.4, + 177.1, + 168.1, + 175.3, + 176.6, + 179.5, + 173.6, + 185.1, + 160.2, + 186.4, + 161.8, + 189.2, + 175.3, + 176.9, + 171.9, + 179.6, + 178.8, + 180.0, + 168.9 + ] + } }, "interactive": { "ttft_ms_p50": 133.03, diff --git a/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/sustained/result.json b/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/sustained/result.json index 3061cd4f..1ac693dc 100644 --- a/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/sustained/result.json +++ b/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/sustained/result.json @@ -462,7 +462,44 @@ "sustained_throughput_tokens_per_sec": 176.0, "throttle_ratio": 0.847, "throttle_onset_minute": 5.0, - "ttft_p99_drift_ms": -9.3 + "ttft_p99_drift_ms": -9.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 176.0, + "std": 6.5, + "cv_pct": 3.69, + "stability": "noisy", + "runs": [ + 174.4, + 171.1, + 181.0, + 169.7, + 177.7, + 181.8, + 176.6, + 177.2, + 177.7, + 176.4, + 177.1, + 168.1, + 175.3, + 176.6, + 179.5, + 173.6, + 185.1, + 160.2, + 186.4, + 161.8, + 189.2, + 175.3, + 176.9, + 171.9, + 179.6, + 178.8, + 180.0, + 168.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/result.json b/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/result.json index e373079d..368cc57b 100644 --- a/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/result.json +++ b/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/result.json @@ -561,7 +561,44 @@ "sustained_throughput_tokens_per_sec": 561.2, "throttle_ratio": 0.936, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -45.4 + "ttft_p99_drift_ms": -45.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 561.2, + "std": 9.6, + "cv_pct": 1.71, + "stability": "stable", + "runs": [ + 561.4, + 545.8, + 565.7, + 583.2, + 554.6, + 567.3, + 556.8, + 555.3, + 571.4, + 549.8, + 561.4, + 549.8, + 565.6, + 568.0, + 563.2, + 561.4, + 562.6, + 570.8, + 546.6, + 561.1, + 576.4, + 549.6, + 550.5, + 573.5, + 556.8, + 555.4, + 555.4, + 573.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/sustained/result.json b/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/sustained/result.json index 73bdd2a1..29c90757 100644 --- a/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/sustained/result.json +++ b/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/sustained/result.json @@ -462,7 +462,44 @@ "sustained_throughput_tokens_per_sec": 561.2, "throttle_ratio": 0.936, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -45.4 + "ttft_p99_drift_ms": -45.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 561.2, + "std": 9.6, + "cv_pct": 1.71, + "stability": "stable", + "runs": [ + 561.4, + 545.8, + 565.7, + 583.2, + 554.6, + 567.3, + 556.8, + 555.3, + 571.4, + 549.8, + 561.4, + 549.8, + 565.6, + 568.0, + 563.2, + 561.4, + 562.6, + 570.8, + 546.6, + 561.1, + 576.4, + 549.6, + 550.5, + 573.5, + 556.8, + 555.4, + 555.4, + 573.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json index 66048c9b..a6161fb2 100644 --- a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json +++ b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json @@ -477,7 +477,44 @@ "sustained_throughput_tokens_per_sec": 116.6, "throttle_ratio": 0.748, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 11.6 + "ttft_p99_drift_ms": 11.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 116.6, + "std": 10.5, + "cv_pct": 8.99, + "stability": "unstable", + "runs": [ + 98.7, + 132.0, + 105.3, + 132.0, + 117.6, + 99.5, + 128.9, + 106.2, + 126.7, + 126.2, + 108.1, + 120.4, + 105.7, + 126.7, + 117.5, + 107.6, + 120.4, + 113.0, + 126.8, + 124.2, + 103.4, + 110.6, + 121.4, + 118.3, + 125.2, + 102.6, + 110.5, + 130.0 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json index 49224331..d2032ab4 100644 --- a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json +++ b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 116.6, "throttle_ratio": 0.748, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 11.6 + "ttft_p99_drift_ms": 11.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 116.6, + "std": 10.5, + "cv_pct": 8.99, + "stability": "unstable", + "runs": [ + 98.7, + 132.0, + 105.3, + 132.0, + 117.6, + 99.5, + 128.9, + 106.2, + 126.7, + 126.2, + 108.1, + 120.4, + 105.7, + 126.7, + 117.5, + 107.6, + 120.4, + 113.0, + 126.8, + 124.2, + 103.4, + 110.6, + 121.4, + 118.3, + 125.2, + 102.6, + 110.5, + 130.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/result.json b/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/result.json index 7c987a20..b2d60369 100644 --- a/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/result.json +++ b/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 2837.3, "throttle_ratio": 0.983, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -51.3 + "ttft_p99_drift_ms": -51.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2837.3, + "std": 16.9, + "cv_pct": 0.6, + "stability": "stable", + "runs": [ + 2816.1, + 2841.7, + 2863.3, + 2824.1, + 2815.9, + 2862.6, + 2858.2, + 2813.6, + 2848.7, + 2828.0, + 2844.6, + 2837.7, + 2830.5, + 2836.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/sustained/result.json b/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/sustained/result.json index 5d1125ae..05e85ae7 100644 --- a/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/sustained/result.json +++ b/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 2837.3, "throttle_ratio": 0.983, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -51.3 + "ttft_p99_drift_ms": -51.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2837.3, + "std": 16.9, + "cv_pct": 0.6, + "stability": "stable", + "runs": [ + 2816.1, + 2841.7, + 2863.3, + 2824.1, + 2815.9, + 2862.6, + 2858.2, + 2813.6, + 2848.7, + 2828.0, + 2844.6, + 2837.7, + 2830.5, + 2836.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/result.json b/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/result.json index fcc6a4cd..a747a7d5 100644 --- a/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/result.json +++ b/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 3880.8, "throttle_ratio": 0.982, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -48.2 + "ttft_p99_drift_ms": -48.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3880.8, + "std": 20.4, + "cv_pct": 0.52, + "stability": "stable", + "runs": [ + 3838.2, + 3867.3, + 3892.6, + 3896.5, + 3882.7, + 3870.2, + 3904.8, + 3845.2, + 3891.9, + 3873.4, + 3888.0, + 3885.3, + 3886.2, + 3908.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/sustained/result.json b/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/sustained/result.json index e983bcbf..67ac38d6 100644 --- a/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/sustained/result.json +++ b/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 3880.8, "throttle_ratio": 0.982, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -48.2 + "ttft_p99_drift_ms": -48.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3880.8, + "std": 20.4, + "cv_pct": 0.52, + "stability": "stable", + "runs": [ + 3838.2, + 3867.3, + 3892.6, + 3896.5, + 3882.7, + 3870.2, + 3904.8, + 3845.2, + 3891.9, + 3873.4, + 3888.0, + 3885.3, + 3886.2, + 3908.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json index 303f3772..2ef7b6b9 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json @@ -478,7 +478,44 @@ "sustained_throughput_tokens_per_sec": 376.2, "throttle_ratio": 0.901, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 18.3 + "ttft_p99_drift_ms": 18.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 376.2, + "std": 8.1, + "cv_pct": 2.15, + "stability": "noisy", + "runs": [ + 375.9, + 376.2, + 374.4, + 394.4, + 373.0, + 365.6, + 382.7, + 363.9, + 386.3, + 375.6, + 389.9, + 355.4, + 386.0, + 372.6, + 381.9, + 376.1, + 374.5, + 370.4, + 373.1, + 380.1, + 367.5, + 384.4, + 373.7, + 376.4, + 377.7, + 376.4, + 370.6, + 379.9 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json index 4d90e875..fadd0afd 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 376.2, "throttle_ratio": 0.901, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 18.3 + "ttft_p99_drift_ms": 18.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 376.2, + "std": 8.1, + "cv_pct": 2.15, + "stability": "noisy", + "runs": [ + 375.9, + 376.2, + 374.4, + 394.4, + 373.0, + 365.6, + 382.7, + 363.9, + 386.3, + 375.6, + 389.9, + 355.4, + 386.0, + 372.6, + 381.9, + 376.1, + 374.5, + 370.4, + 373.1, + 380.1, + 367.5, + 384.4, + 373.7, + 376.4, + 377.7, + 376.4, + 370.6, + 379.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json index bd6f82b1..0f5a1978 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 371.9, "throttle_ratio": 0.894, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -239.9 + "ttft_p99_drift_ms": -239.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 371.9, + "std": 10.0, + "cv_pct": 2.69, + "stability": "noisy", + "runs": [ + 348.3, + 386.1, + 365.8, + 369.9, + 372.0, + 370.2, + 372.6, + 371.2, + 389.7, + 362.3, + 380.4, + 374.5, + 373.2, + 370.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json index b3717fde..2247a062 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 371.9, "throttle_ratio": 0.894, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -239.9 + "ttft_p99_drift_ms": -239.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 371.9, + "std": 10.0, + "cv_pct": 2.69, + "stability": "noisy", + "runs": [ + 348.3, + 386.1, + 365.8, + 369.9, + 372.0, + 370.2, + 372.6, + 371.2, + 389.7, + 362.3, + 380.4, + 374.5, + 373.2, + 370.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/result.json index c557ea99..8b260dd4 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 586.1, "throttle_ratio": 0.969, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -131.6 + "ttft_p99_drift_ms": -131.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 586.1, + "std": 5.3, + "cv_pct": 0.9, + "stability": "stable", + "runs": [ + 581.9, + 582.3, + 590.4, + 582.1, + 588.5, + 592.1, + 591.0, + 588.4, + 582.5, + 573.7, + 588.2, + 582.6, + 590.6, + 590.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/sustained/result.json index 3473e5c4..e2b9f0fb 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 586.1, "throttle_ratio": 0.969, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -131.6 + "ttft_p99_drift_ms": -131.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 586.1, + "std": 5.3, + "cv_pct": 0.9, + "stability": "stable", + "runs": [ + 581.9, + 582.3, + 590.4, + 582.1, + 588.5, + 592.1, + 591.0, + 588.4, + 582.5, + 573.7, + 588.2, + 582.6, + 590.6, + 590.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/result.json index 10e8e5a7..4dc5165e 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 816.3, "throttle_ratio": 0.924, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -201.9 + "ttft_p99_drift_ms": -201.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 816.3, + "std": 15.8, + "cv_pct": 1.94, + "stability": "stable", + "runs": [ + 778.0, + 818.5, + 841.8, + 803.7, + 816.3, + 809.1, + 829.1, + 820.8, + 806.2, + 815.9, + 826.8, + 810.4, + 813.1, + 838.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/sustained/result.json index 510302b5..4d15f3f6 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 816.3, "throttle_ratio": 0.924, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -201.9 + "ttft_p99_drift_ms": -201.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 816.3, + "std": 15.8, + "cv_pct": 1.94, + "stability": "stable", + "runs": [ + 778.0, + 818.5, + 841.8, + 803.7, + 816.3, + 809.1, + 829.1, + 820.8, + 806.2, + 815.9, + 826.8, + 810.4, + 813.1, + 838.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/result.json index fafdfb44..a692cf2c 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 584.6, "throttle_ratio": 0.953, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -246.4 + "ttft_p99_drift_ms": -246.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 584.6, + "std": 8.1, + "cv_pct": 1.38, + "stability": "stable", + "runs": [ + 565.5, + 571.7, + 589.7, + 590.6, + 581.9, + 582.3, + 590.4, + 591.0, + 590.5, + 582.2, + 582.2, + 590.3, + 582.1, + 593.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/sustained/result.json index 9616ccd0..b52408a5 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 584.6, "throttle_ratio": 0.953, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -246.4 + "ttft_p99_drift_ms": -246.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 584.6, + "std": 8.1, + "cv_pct": 1.38, + "stability": "stable", + "runs": [ + 565.5, + 571.7, + 589.7, + 590.6, + 581.9, + 582.3, + 590.4, + 591.0, + 590.5, + 582.2, + 582.2, + 590.3, + 582.1, + 593.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/result.json index 454de8a9..a8762aec 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 534.5, "throttle_ratio": 0.952, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -110.6 + "ttft_p99_drift_ms": -110.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 534.5, + "std": 5.8, + "cv_pct": 1.09, + "stability": "stable", + "runs": [ + 519.8, + 531.9, + 532.4, + 535.4, + 535.9, + 536.8, + 536.4, + 537.7, + 536.4, + 533.3, + 527.9, + 537.8, + 546.0, + 534.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/sustained/result.json index 357b1453..b7a08b4b 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 534.5, "throttle_ratio": 0.952, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -110.6 + "ttft_p99_drift_ms": -110.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 534.5, + "std": 5.8, + "cv_pct": 1.09, + "stability": "stable", + "runs": [ + 519.8, + 531.9, + 532.4, + 535.4, + 535.9, + 536.8, + 536.4, + 537.7, + 536.4, + 533.3, + 527.9, + 537.8, + 546.0, + 534.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json index 7bb06c17..89695625 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json @@ -421,7 +421,44 @@ "sustained_throughput_tokens_per_sec": 32.3, "throttle_ratio": 0.461, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 2424.7 + "ttft_p99_drift_ms": 2424.7, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 32.3, + "std": 11.7, + "cv_pct": 36.18, + "stability": "unstable", + "runs": [ + 25.6, + 25.6, + 25.6, + 55.4, + 25.6, + 29.9, + 25.6, + 51.2, + 25.6, + 29.9, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 25.6, + 55.5, + 25.6, + 29.9, + 51.1, + 25.7, + 25.6, + 29.9, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json index cb6e2f90..ae235b6a 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 32.3, "throttle_ratio": 0.461, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 2424.7 + "ttft_p99_drift_ms": 2424.7, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 32.3, + "std": 11.7, + "cv_pct": 36.18, + "stability": "unstable", + "runs": [ + 25.6, + 25.6, + 25.6, + 55.4, + 25.6, + 29.9, + 25.6, + 51.2, + 25.6, + 29.9, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 25.6, + 55.5, + 25.6, + 29.9, + 51.1, + 25.7, + 25.6, + 29.9, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json index 502115f5..b4d5719f 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 2895.0, "throttle_ratio": 0.773, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -18.1 + "ttft_p99_drift_ms": -18.1, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2895.0, + "std": 197.6, + "cv_pct": 6.83, + "stability": "unstable", + "runs": [ + 3526.2, + 3053.6, + 2843.1, + 2823.8, + 2814.7, + 2834.9, + 2805.7, + 2814.8, + 2790.3, + 2824.4, + 2862.5, + 2960.7, + 2848.3, + 2726.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json index e5d7bf0c..3e00a02f 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 2895.0, "throttle_ratio": 0.773, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -18.1 + "ttft_p99_drift_ms": -18.1, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2895.0, + "std": 197.6, + "cv_pct": 6.83, + "stability": "unstable", + "runs": [ + 3526.2, + 3053.6, + 2843.1, + 2823.8, + 2814.7, + 2834.9, + 2805.7, + 2814.8, + 2790.3, + 2824.4, + 2862.5, + 2960.7, + 2848.3, + 2726.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/result.json b/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/result.json index 726f575d..0cb9ba46 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/result.json @@ -533,7 +533,44 @@ "sustained_throughput_tokens_per_sec": 265.3, "throttle_ratio": 0.877, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -18.4 + "ttft_p99_drift_ms": -18.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 265.3, + "std": 8.6, + "cv_pct": 3.25, + "stability": "noisy", + "runs": [ + 259.8, + 264.2, + 272.7, + 273.7, + 262.9, + 270.4, + 247.2, + 262.5, + 273.3, + 262.8, + 272.8, + 270.2, + 252.1, + 266.8, + 263.8, + 272.9, + 258.3, + 278.3, + 270.2, + 248.1, + 262.5, + 281.9, + 254.3, + 272.9, + 270.5, + 260.2, + 259.2, + 264.1 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/sustained/result.json index b55019af..5c4fda4e 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/sustained/result.json @@ -432,7 +432,44 @@ "sustained_throughput_tokens_per_sec": 265.3, "throttle_ratio": 0.877, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -18.4 + "ttft_p99_drift_ms": -18.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 265.3, + "std": 8.6, + "cv_pct": 3.25, + "stability": "noisy", + "runs": [ + 259.8, + 264.2, + 272.7, + 273.7, + 262.9, + 270.4, + 247.2, + 262.5, + 273.3, + 262.8, + 272.8, + 270.2, + 252.1, + 266.8, + 263.8, + 272.9, + 258.3, + 278.3, + 270.2, + 248.1, + 262.5, + 281.9, + 254.3, + 272.9, + 270.5, + 260.2, + 259.2, + 264.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/result.json index b108afd2..db10bf97 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 272.0, "throttle_ratio": 0.874, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -232.8 + "ttft_p99_drift_ms": -232.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 272.0, + "std": 11.4, + "cv_pct": 4.19, + "stability": "noisy", + "runs": [ + 253.7, + 275.2, + 290.4, + 285.6, + 280.0, + 282.0, + 285.6, + 271.1, + 256.0, + 265.8, + 266.7, + 262.5, + 266.7, + 266.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/sustained/result.json index 06112e5a..01b6eee9 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 272.0, "throttle_ratio": 0.874, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -232.8 + "ttft_p99_drift_ms": -232.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 272.0, + "std": 11.4, + "cv_pct": 4.19, + "stability": "noisy", + "runs": [ + 253.7, + 275.2, + 290.4, + 285.6, + 280.0, + 282.0, + 285.6, + 271.1, + 256.0, + 265.8, + 266.7, + 262.5, + 266.7, + 266.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json index 9aa21947..55b7c653 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 435.5, "throttle_ratio": 0.891, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -477.8 + "ttft_p99_drift_ms": -477.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 435.5, + "std": 11.5, + "cv_pct": 2.65, + "stability": "noisy", + "runs": [ + 407.9, + 438.6, + 442.3, + 439.1, + 432.0, + 441.9, + 428.9, + 438.9, + 457.6, + 425.9, + 442.3, + 430.0, + 443.6, + 427.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json index 65eabbca..dcc4044d 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 435.5, "throttle_ratio": 0.891, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -477.8 + "ttft_p99_drift_ms": -477.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 435.5, + "std": 11.5, + "cv_pct": 2.65, + "stability": "noisy", + "runs": [ + 407.9, + 438.6, + 442.3, + 439.1, + 432.0, + 441.9, + 428.9, + 438.9, + 457.6, + 425.9, + 442.3, + 430.0, + 443.6, + 427.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json index 305ae8bf..d7acc44f 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 541.0, "throttle_ratio": 0.824, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": -318.2 + "ttft_p99_drift_ms": -318.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 541.0, + "std": 40.7, + "cv_pct": 7.53, + "stability": "unstable", + "runs": [ + 568.6, + 600.0, + 588.4, + 608.5, + 591.8, + 535.1, + 501.3, + 513.5, + 507.0, + 515.9, + 508.6, + 521.6, + 502.9, + 510.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json index 0281b3d5..186d212d 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 541.0, "throttle_ratio": 0.824, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": -318.2 + "ttft_p99_drift_ms": -318.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 541.0, + "std": 40.7, + "cv_pct": 7.53, + "stability": "unstable", + "runs": [ + 568.6, + 600.0, + 588.4, + 608.5, + 591.8, + 535.1, + 501.3, + 513.5, + 507.0, + 515.9, + 508.6, + 521.6, + 502.9, + 510.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json index 73ae3e59..7e942d09 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 433.8, "throttle_ratio": 0.91, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -471.4 + "ttft_p99_drift_ms": -471.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 433.8, + "std": 9.6, + "cv_pct": 2.22, + "stability": "noisy", + "runs": [ + 409.8, + 439.3, + 434.7, + 433.3, + 441.7, + 440.2, + 450.3, + 424.3, + 441.8, + 433.8, + 436.5, + 429.7, + 430.7, + 427.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json index da8913c7..99819324 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 433.8, "throttle_ratio": 0.91, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -471.4 + "ttft_p99_drift_ms": -471.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 433.8, + "std": 9.6, + "cv_pct": 2.22, + "stability": "noisy", + "runs": [ + 409.8, + 439.3, + 434.7, + 433.3, + 441.7, + 440.2, + 450.3, + 424.3, + 441.8, + 433.8, + 436.5, + 429.7, + 430.7, + 427.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/result.json index e7a07246..ec510205 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 419.5, "throttle_ratio": 0.862, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -177.6 + "ttft_p99_drift_ms": -177.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 419.5, + "std": 16.4, + "cv_pct": 3.9, + "stability": "noisy", + "runs": [ + 384.3, + 426.5, + 445.8, + 403.7, + 422.8, + 435.2, + 396.5, + 437.6, + 420.4, + 418.3, + 426.9, + 417.6, + 412.9, + 424.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/sustained/result.json index b6428506..e18178cf 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 419.5, "throttle_ratio": 0.862, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -177.6 + "ttft_p99_drift_ms": -177.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 419.5, + "std": 16.4, + "cv_pct": 3.9, + "stability": "noisy", + "runs": [ + 384.3, + 426.5, + 445.8, + 403.7, + 422.8, + 435.2, + 396.5, + 437.6, + 420.4, + 418.3, + 426.9, + 417.6, + 412.9, + 424.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json index f6f98fbe..36449e18 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json @@ -476,7 +476,44 @@ "sustained_throughput_tokens_per_sec": 30.5, "throttle_ratio": 0.461, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -34.2 + "ttft_p99_drift_ms": -34.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 30.5, + "std": 10.5, + "cv_pct": 34.45, + "stability": "unstable", + "runs": [ + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.4, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json index 42a7881e..68bdb8eb 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json @@ -432,7 +432,44 @@ "sustained_throughput_tokens_per_sec": 30.5, "throttle_ratio": 0.461, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -34.2 + "ttft_p99_drift_ms": -34.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 30.5, + "std": 10.5, + "cv_pct": 34.45, + "stability": "unstable", + "runs": [ + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.4, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json index 7d98f82d..7ec3f32e 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json @@ -369,7 +369,30 @@ "sustained_throughput_tokens_per_sec": 1917.3, "throttle_ratio": 0.728, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -21.5 + "ttft_p99_drift_ms": -21.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1917.3, + "std": 286.8, + "cv_pct": 14.96, + "stability": "unstable", + "runs": [ + 2197.7, + 2238.6, + 2282.9, + 2213.9, + 2247.4, + 2232.2, + 1661.7, + 1675.0, + 1663.0, + 1705.4, + 1668.5, + 1678.5, + 1709.0, + 1668.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json index 9d66800d..1c8b81b0 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 1917.3, "throttle_ratio": 0.728, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -21.5 + "ttft_p99_drift_ms": -21.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1917.3, + "std": 286.8, + "cv_pct": 14.96, + "stability": "unstable", + "runs": [ + 2197.7, + 2238.6, + 2282.9, + 2213.9, + 2247.4, + 2232.2, + 1661.7, + 1675.0, + 1663.0, + 1705.4, + 1668.5, + 1678.5, + 1709.0, + 1668.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json index a31b870e..615b2be1 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json @@ -574,7 +574,44 @@ "sustained_throughput_tokens_per_sec": 105.3, "throttle_ratio": 0.764, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -30.2 + "ttft_p99_drift_ms": -30.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 105.3, + "std": 6.8, + "cv_pct": 6.47, + "stability": "unstable", + "runs": [ + 108.7, + 106.6, + 102.3, + 97.9, + 111.4, + 102.8, + 101.8, + 105.3, + 113.3, + 110.1, + 100.9, + 96.0, + 108.5, + 116.2, + 88.8, + 110.6, + 111.3, + 104.4, + 107.8, + 93.2, + 109.5, + 110.7, + 101.6, + 106.6, + 103.9, + 112.4, + 93.3, + 112.3 + ] + } }, "interactive": { "ttft_ms_p50": 157.98, diff --git a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json index d271ba19..27ee58fc 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 105.3, "throttle_ratio": 0.764, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -30.2 + "ttft_p99_drift_ms": -30.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 105.3, + "std": 6.8, + "cv_pct": 6.47, + "stability": "unstable", + "runs": [ + 108.7, + 106.6, + 102.3, + 97.9, + 111.4, + 102.8, + 101.8, + 105.3, + 113.3, + 110.1, + 100.9, + 96.0, + 108.5, + 116.2, + 88.8, + 110.6, + 111.3, + 104.4, + 107.8, + 93.2, + 109.5, + 110.7, + 101.6, + 106.6, + 103.9, + 112.4, + 93.3, + 112.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/result.json b/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/result.json index ba4de6df..70f04083 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 343.0, "throttle_ratio": 0.893, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -70.2 + "ttft_p99_drift_ms": -70.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 343.0, + "std": 10.9, + "cv_pct": 3.17, + "stability": "noisy", + "runs": [ + 341.7, + 342.6, + 326.1, + 337.2, + 353.4, + 349.3, + 330.5, + 364.8, + 341.4, + 339.1, + 352.7, + 340.2, + 328.8, + 350.5, + 342.9, + 340.7, + 332.5, + 336.4, + 357.0, + 331.2, + 364.2, + 331.5, + 349.5, + 338.7, + 350.9, + 325.7, + 356.4, + 349.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/sustained/result.json b/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/sustained/result.json index cd2c17af..53672ae3 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 343.0, "throttle_ratio": 0.893, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -70.2 + "ttft_p99_drift_ms": -70.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 343.0, + "std": 10.9, + "cv_pct": 3.17, + "stability": "noisy", + "runs": [ + 341.7, + 342.6, + 326.1, + 337.2, + 353.4, + 349.3, + 330.5, + 364.8, + 341.4, + 339.1, + 352.7, + 340.2, + 328.8, + 350.5, + 342.9, + 340.7, + 332.5, + 336.4, + 357.0, + 331.2, + 364.2, + 331.5, + 349.5, + 338.7, + 350.9, + 325.7, + 356.4, + 349.1 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/result.json b/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/result.json index 06c3675f..9360106c 100644 --- a/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/result.json +++ b/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 2006.9, "throttle_ratio": 0.982, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -156.7 + "ttft_p99_drift_ms": -156.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2006.9, + "std": 10.1, + "cv_pct": 0.5, + "stability": "stable", + "runs": [ + 1999.4, + 2004.0, + 1998.1, + 2014.0, + 2013.6, + 2003.3, + 2015.3, + 2006.1, + 1997.0, + 1997.9, + 2013.0, + 1999.4, + 2033.6, + 2002.5 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/sustained/result.json b/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/sustained/result.json index 8f3b9484..9c1d9ecc 100644 --- a/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/sustained/result.json +++ b/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 2006.9, "throttle_ratio": 0.982, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -156.7 + "ttft_p99_drift_ms": -156.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2006.9, + "std": 10.1, + "cv_pct": 0.5, + "stability": "stable", + "runs": [ + 1999.4, + 2004.0, + 1998.1, + 2014.0, + 2013.6, + 2003.3, + 2015.3, + 2006.1, + 1997.0, + 1997.9, + 2013.0, + 1999.4, + 2033.6, + 2002.5 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/result.json index da09653a..769b621e 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/result.json @@ -500,7 +500,44 @@ "sustained_throughput_tokens_per_sec": 268.3, "throttle_ratio": 0.853, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -12.5 + "ttft_p99_drift_ms": -12.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 268.3, + "std": 11.1, + "cv_pct": 4.12, + "stability": "noisy", + "runs": [ + 265.0, + 294.4, + 276.4, + 269.7, + 295.7, + 281.2, + 281.4, + 271.3, + 263.5, + 265.3, + 255.6, + 262.4, + 276.5, + 255.3, + 263.9, + 254.9, + 282.2, + 262.4, + 271.5, + 252.3, + 262.6, + 268.9, + 261.8, + 269.2, + 260.4, + 262.4, + 269.1, + 257.7 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/sustained/result.json index adbaa643..2060ee84 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 268.3, "throttle_ratio": 0.853, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -12.5 + "ttft_p99_drift_ms": -12.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 268.3, + "std": 11.1, + "cv_pct": 4.12, + "stability": "noisy", + "runs": [ + 265.0, + 294.4, + 276.4, + 269.7, + 295.7, + 281.2, + 281.4, + 271.3, + 263.5, + 265.3, + 255.6, + 262.4, + 276.5, + 255.3, + 263.9, + 254.9, + 282.2, + 262.4, + 271.5, + 252.3, + 262.6, + 268.9, + 261.8, + 269.2, + 260.4, + 262.4, + 269.1, + 257.7 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/result.json index b1111115..b0451f0e 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 265.9, "throttle_ratio": 0.864, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -315.2 + "ttft_p99_drift_ms": -315.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 265.9, + "std": 11.7, + "cv_pct": 4.41, + "stability": "noisy", + "runs": [ + 253.9, + 268.8, + 273.6, + 288.3, + 272.3, + 261.3, + 277.1, + 252.4, + 268.1, + 249.0, + 278.5, + 253.5, + 269.7, + 256.7 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/sustained/result.json index dd11245f..22e3245f 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 265.9, "throttle_ratio": 0.864, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -315.2 + "ttft_p99_drift_ms": -315.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 265.9, + "std": 11.7, + "cv_pct": 4.41, + "stability": "noisy", + "runs": [ + 253.9, + 268.8, + 273.6, + 288.3, + 272.3, + 261.3, + 277.1, + 252.4, + 268.1, + 249.0, + 278.5, + 253.5, + 269.7, + 256.7 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json index 7fd24ec5..37fc0648 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 416.4, "throttle_ratio": 0.915, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -335.7 + "ttft_p99_drift_ms": -335.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 416.4, + "std": 10.9, + "cv_pct": 2.62, + "stability": "noisy", + "runs": [ + 400.3, + 404.0, + 414.5, + 428.4, + 407.3, + 435.2, + 418.8, + 418.2, + 419.1, + 413.0, + 426.8, + 422.6, + 423.5, + 398.3 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json index 003f7bcc..f21b9a41 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 416.4, "throttle_ratio": 0.915, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -335.7 + "ttft_p99_drift_ms": -335.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 416.4, + "std": 10.9, + "cv_pct": 2.62, + "stability": "noisy", + "runs": [ + 400.3, + 404.0, + 414.5, + 428.4, + 407.3, + 435.2, + 418.8, + 418.2, + 419.1, + 413.0, + 426.8, + 422.6, + 423.5, + 398.3 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json index 17b35079..02547706 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json @@ -444,7 +444,44 @@ "sustained_throughput_tokens_per_sec": 14.9, "throttle_ratio": 0.399, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 34216.0 + "ttft_p99_drift_ms": 34216.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 14.9, + "std": 4.4, + "cv_pct": 29.72, + "stability": "unstable", + "runs": [ + 17.1, + 17.1, + 8.5, + 17.1, + 17.1, + 8.5, + 17.1, + 21.3, + 8.5, + 17.1, + 17.1, + 8.5, + 21.3, + 17.1, + 8.5, + 17.1, + 17.1, + 17.1, + 8.5, + 17.1, + 17.1, + 8.5, + 21.3, + 17.1, + 8.5, + 17.1, + 12.8, + 17.1 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json index 07f35034..eac8dc1e 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 14.9, "throttle_ratio": 0.399, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 34216.0 + "ttft_p99_drift_ms": 34216.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 14.9, + "std": 4.4, + "cv_pct": 29.72, + "stability": "unstable", + "runs": [ + 17.1, + 17.1, + 8.5, + 17.1, + 17.1, + 8.5, + 17.1, + 21.3, + 8.5, + 17.1, + 17.1, + 8.5, + 21.3, + 17.1, + 8.5, + 17.1, + 17.1, + 17.1, + 8.5, + 17.1, + 17.1, + 8.5, + 21.3, + 17.1, + 8.5, + 17.1, + 12.8, + 17.1 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/result.json index 890bb38f..08b68576 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/result.json @@ -336,7 +336,30 @@ "sustained_throughput_tokens_per_sec": 2789.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -83.9 + "ttft_p99_drift_ms": -83.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2789.7, + "std": 50.6, + "cv_pct": 1.81, + "stability": "stable", + "runs": [ + 2797.6, + 2649.7, + 2789.6, + 2783.6, + 2742.5, + 2781.3, + 2791.3, + 2763.1, + 2817.4, + 2817.8, + 2842.1, + 2857.4, + 2830.4, + 2792.3 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/sustained/result.json index 2091a26b..39e3777b 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 2789.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -83.9 + "ttft_p99_drift_ms": -83.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2789.7, + "std": 50.6, + "cv_pct": 1.81, + "stability": "stable", + "runs": [ + 2797.6, + 2649.7, + 2789.6, + 2783.6, + 2742.5, + 2781.3, + 2791.3, + 2763.1, + 2817.4, + 2817.8, + 2842.1, + 2857.4, + 2830.4, + 2792.3 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json index 6536a548..96ee6b89 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 92.8, "throttle_ratio": 0.749, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 34.6 + "ttft_p99_drift_ms": 34.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 92.8, + "std": 8.0, + "cv_pct": 8.63, + "stability": "unstable", + "runs": [ + 86.6, + 95.9, + 92.6, + 103.3, + 80.9, + 93.4, + 92.7, + 84.7, + 102.0, + 89.7, + 88.6, + 94.3, + 92.8, + 106.5, + 81.9, + 92.7, + 100.4, + 83.5, + 99.3, + 86.1, + 97.7, + 82.1, + 97.0, + 102.8, + 79.8, + 106.2, + 84.7, + 99.2 + ] + } }, "interactive": { "ttft_ms_p50": 225.66, diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json index 23d04c69..ba0678ca 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 92.8, "throttle_ratio": 0.749, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 34.6 + "ttft_p99_drift_ms": 34.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 92.8, + "std": 8.0, + "cv_pct": 8.63, + "stability": "unstable", + "runs": [ + 86.6, + 95.9, + 92.6, + 103.3, + 80.9, + 93.4, + 92.7, + 84.7, + 102.0, + 89.7, + 88.6, + 94.3, + 92.8, + 106.5, + 81.9, + 92.7, + 100.4, + 83.5, + 99.3, + 86.1, + 97.7, + 82.1, + 97.0, + 102.8, + 79.8, + 106.2, + 84.7, + 99.2 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json index 4c93f34e..22487a5c 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 293.9, "throttle_ratio": 0.811, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": 22.3 + "ttft_p99_drift_ms": 22.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 293.9, + "std": 15.7, + "cv_pct": 5.35, + "stability": "unstable", + "runs": [ + 311.7, + 301.1, + 301.5, + 293.5, + 300.5, + 320.6, + 285.0, + 308.8, + 297.6, + 290.8, + 304.3, + 316.8, + 293.8, + 287.4, + 316.6, + 305.1, + 296.0, + 298.7, + 260.1, + 298.2, + 284.7, + 274.4, + 293.2, + 267.2, + 271.3, + 297.0, + 262.9, + 291.0 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json index 1804e6c1..893bd657 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 293.9, "throttle_ratio": 0.811, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": 22.3 + "ttft_p99_drift_ms": 22.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 293.9, + "std": 15.7, + "cv_pct": 5.35, + "stability": "unstable", + "runs": [ + 311.7, + 301.1, + 301.5, + 293.5, + 300.5, + 320.6, + 285.0, + 308.8, + 297.6, + 290.8, + 304.3, + 316.8, + 293.8, + 287.4, + 316.6, + 305.1, + 296.0, + 298.7, + 260.1, + 298.2, + 284.7, + 274.4, + 293.2, + 267.2, + 271.3, + 297.0, + 262.9, + 291.0 + ] + } } }, "accuracy": { From c3ea4db4a19960a9417462bee25b3a42b6d932da Mon Sep 17 00:00:00 2001 From: Liang Juhao Date: Tue, 19 May 2026 18:29:52 +0800 Subject: [PATCH 4/5] =?UTF-8?q?chore:=20tune=20reliability=20thresholds=20?= =?UTF-8?q?+=20rename=20unstable=20=E2=86=92=20high-variance?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tightening the labels after looking at real backfilled data. The initial ≤2%/≤5%/>5% thresholds labelled the literal median submission "noisy" and slapped ~30% of submissions with an "unstable ✗" badge, which read as a verdict on the submitter rather than an informational note about the hardware × workload pair. Empirical distribution from the May-2026 backfill (255 sustained CVs): median = 3.10 %, p90 = 13.07 %, max = 36.18 % What changed - Thresholds: ≤3% stable / ≤8% noisy / >8% high-variance (loadgen/loadgen.py `_STABILITY_THRESHOLD_*`). - Renamed the third tier "unstable" → "high-variance" everywhere (label string, modal pill class, docs). High CV does not mean the measurement is wrong — it means the hardware × workload combo has irreducible jitter (consumer-card thermal throttle, HCCL noise on ×16 Ascend topologies, speculative-decoding acceptance-rate jitter). - Dropped the ✗ glyph for the high-variance tier; only stable / noisy retain ✓ / ⚠. The CSS pill uses an orange tone, never pure red, so readers read "look closer" rather than "this is broken". - DEVELOPMENT.md explains the rebrand: high-variance submitters do not need to re-run; the badge sizes safety margins for downstream hardware shoppers. Resulting distribution (new thresholds): stable : 47.8% (n=122) noisy : 35.3% (n=90) high-variance : 16.9% (n=43) Tail check — every chip in the 13 worst-CV submissions is a legitimate flag: RTX 5090 / A6000 / RTX 6000 Ada / V100s (consumer/workstation cards lacking datacenter cooling), Ascend ×16 / ×8 distributed (real HCCL jitter), and the H20-3e (lower thermal headroom variant). Data update - Re-labelled 136 of the 255 backfilled sustained reliability blocks in place. Only the `stability` string moved; `cv_pct`, `mean`, `std`, and `runs[]` arrays are byte-identical, so the diff per file is a single line. Tests - loadgen/tests/test_reliability.py: updated boundary expectations and membership sets to track the new labels. 21/21 still pass. Co-authored-by: Cursor --- DEVELOPMENT.md | 19 +++++++- leaderboard/site/assets/css/modal.css | 13 ++++-- leaderboard/site/assets/js/modal.js | 44 ++++++++++++++----- loadgen/loadgen.py | 27 ++++++++---- loadgen/tests/test_reliability.py | 14 +++--- .../result.json | 2 +- .../result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../bf16/result.json | 2 +- .../bf16/sustained/result.json | 2 +- .../w4a16/result.json | 2 +- .../w4a16/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../bf16/result.json | 2 +- .../bf16/sustained/result.json | 2 +- .../w4a16/result.json | 2 +- .../w4a16/sustained/result.json | 2 +- .../w8a16/result.json | 2 +- .../w8a16/sustained/result.json | 2 +- .../fp8/result.json | 2 +- .../fp8/sustained/result.json | 2 +- .../w8a16/result.json | 2 +- .../w8a16/sustained/result.json | 2 +- .../w8a8/result.json | 2 +- .../w8a8/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../bf16/result.json | 2 +- .../bf16/sustained/result.json | 2 +- .../w4a16/result.json | 2 +- .../w4a16/sustained/result.json | 2 +- .../w8a16/result.json | 2 +- .../w8a16/sustained/result.json | 2 +- .../w8a8/result.json | 2 +- .../w8a8/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../fp8/result.json | 2 +- .../fp8/sustained/result.json | 2 +- .../w8a16/result.json | 2 +- .../w8a16/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../fp8/result.json | 2 +- .../fp8/sustained/result.json | 2 +- .../w4a16/result.json | 2 +- .../w4a16/sustained/result.json | 2 +- .../w8a16/result.json | 2 +- .../w8a16/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../fp8/result.json | 2 +- .../fp8/sustained/result.json | 2 +- .../w4a16/result.json | 2 +- .../w4a16/sustained/result.json | 2 +- .../w8a8/result.json | 2 +- .../w8a8/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../bf16/result.json | 2 +- .../bf16/sustained/result.json | 2 +- .../w4a16/result.json | 2 +- .../w4a16/sustained/result.json | 2 +- .../w8a16/result.json | 2 +- .../w8a16/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../fp8/result.json | 2 +- .../fp8/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../fp8/result.json | 2 +- .../fp8/sustained/result.json | 2 +- .../w8a16/result.json | 2 +- .../w8a16/sustained/result.json | 2 +- .../w8a8/result.json | 2 +- .../w8a8/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../bf16/result.json | 2 +- .../bf16/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../fp8/result.json | 2 +- .../fp8/sustained/result.json | 2 +- .../w4a16/result.json | 2 +- .../w4a16/sustained/result.json | 2 +- .../w8a16/result.json | 2 +- .../w8a16/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../w4a16/result.json | 2 +- .../w4a16/sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- .../result.json | 2 +- .../sustained/result.json | 2 +- 141 files changed, 223 insertions(+), 166 deletions(-) diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 49d443f9..d57f243d 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -945,8 +945,23 @@ shipping `samples.jsonl`. Shape: } ``` -`stability` thresholds (tunable): `cv_pct ≤ 2 → stable ✓`, -`≤ 5 → noisy ⚠`, otherwise `unstable ✗`. +`stability` thresholds: `cv_pct ≤ 3 → stable ✓`, `≤ 8 → noisy ⚠`, +otherwise `high-variance`. Calibrated from the May-2026 backfill — see +the comment above `_STABILITY_THRESHOLD_*` in `loadgen/loadgen.py` for the +empirical distribution that informed the choice. Tunable centrally there. + +**`high-variance` is informational, not a verdict.** High CV means the +hardware × workload combo carries irreducible jitter (thermal throttle on +consumer cards, HCCL noise on 16-chip Ascend topologies, acceptance-rate +fluctuation on speculative decoding) — it is **not** a sign the +submission is broken. The frontend reflects this: high-variance pills +use an orange tone with no error glyph, while only stable / noisy carry +✓ / ⚠ icons. + +If you submit a result that lands as high-variance, you do not need to +re-run. The badge is for downstream readers picking hardware for +latency-sensitive workloads — they can use the CV % to size their +safety margins, while peak-throughput shoppers can largely ignore it. | Scenario | Field path | Reliability source | |---|---|---| diff --git a/leaderboard/site/assets/css/modal.css b/leaderboard/site/assets/css/modal.css index 0e76c4ba..aec54145 100644 --- a/leaderboard/site/assets/css/modal.css +++ b/leaderboard/site/assets/css/modal.css @@ -142,10 +142,15 @@ border: 1px solid color-mix(in srgb, currentColor 35%, transparent); background: color-mix(in srgb, currentColor 10%, transparent); } -.modal-reliab-pill.stable { color: var(--good, #2da44e); } -.modal-reliab-pill.noisy { color: var(--warn, #d29922); } -.modal-reliab-pill.unstable { color: var(--bad, #cf222e); } -.modal-reliab-pill.unknown { color: var(--fg-faint); } +.modal-reliab-pill.stable { color: var(--good, #2da44e); } +.modal-reliab-pill.noisy { color: var(--warn, #d29922); } +/* High-variance uses an orange tone (not pure red) on purpose: red would + * read as an error glyph, but a high-CV submission may be a perfectly + * correct measurement of a legitimately jittery hardware × workload pair + * (e.g. consumer cards under sustained load). Orange says "look at this" + * without saying "this is broken". */ +.modal-reliab-pill.high_variance { color: #d97706; } +.modal-reliab-pill.unknown { color: var(--fg-faint); } .modal-close { background: transparent; diff --git a/leaderboard/site/assets/js/modal.js b/leaderboard/site/assets/js/modal.js index f99d53e2..ee94d615 100644 --- a/leaderboard/site/assets/js/modal.js +++ b/leaderboard/site/assets/js/modal.js @@ -398,12 +398,23 @@ function _detailSection(title, rows) { // // Reliability blocks are emitted by loadgen.py for each scenario starting in // the "feat: emit reliability stats" series. Each block has shape: -// { n, mean, std, cv_pct, stability: "stable" | "noisy" | "unstable", runs: [...] } +// { n, mean, std, cv_pct, stability: "stable"|"noisy"|"high-variance", runs:[…] } // Older results (pre-feature) carry an empty {} which we skip silently — the // section header is suppressed if no scenario reported a block, so old runs // look the same as before. - -const _STABILITY_ICON = { stable: "✓", noisy: "⚠", unstable: "✗" }; +// +// Design note on iconography: +// stable → ✓ Green — the headline number is the property of the chip. +// noisy → ⚠ Amber — modest jitter (≤8 % CV). Common, not a problem. +// high-variance → No glyph, red-tinted text — informational, not a verdict. +// We intentionally do NOT use an ✗ for high-variance. High CV means "this +// hardware × workload combo carries irreducible variability the reader +// should be aware of", not "this measurement is wrong". An ✗ would +// implicitly indict submissions whose hardware simply lacks proper cooling, +// or scale-out runs whose network jitter is genuine. The colour cue +// communicates "look closer" without the verdict. + +const _STABILITY_ICON = { stable: "✓", noisy: "⚠", "high-variance": "" }; function _hasReliability(block) { return block && typeof block === "object" && block.cv_pct != null; @@ -442,16 +453,24 @@ function _pickWorstReliability(row) { } // Build the small `cv X.X% ✓` pill that appears in the modal subtitle row. +// Tooltip explains the thresholds and what they mean, since `high-variance` +// is a description rather than a verdict — readers should see "natural +// variability" not "broken measurement". function _reliabilityPill(row) { const w = _pickWorstReliability(row); if (!w) return ""; - const icon = _STABILITY_ICON[w.stability] || "·"; - const cls = "modal-reliab-pill " + (w.stability || "unknown"); + const icon = _STABILITY_ICON[w.stability] || ""; + // CSS class names must be CSS-safe: replace "-" → "_" for high_variance. + const stabilitySlug = (w.stability || "unknown").replace(/-/g, "_"); + const cls = "modal-reliab-pill " + stabilitySlug; const title = `Worst inter-run CV across scenarios: ${w.cv_pct}% ` + `(${w.scenario} ${w.metric}). ` + - `≤2% stable, ≤5% noisy, >5% unstable.`; + `≤3% stable, ≤8% noisy, >8% high-variance. ` + + `High-variance is informational — it means this hardware × workload ` + + `combo has irreducible jitter, not that the measurement is wrong.`; + const head = icon ? `${icon} cv ${w.cv_pct}%` : `cv ${w.cv_pct}%`; return `` + - `reliability ${icon} cv ${esc(String(w.cv_pct))}%`; + `reliability ${esc(head)}`; } // Render one row per scenario in the Details tab. Skipped if a scenario has @@ -460,9 +479,14 @@ function _reliabilityRows(row) { const viz = row.viz || {}; const rows = []; - const fmtBlock = (b) => - `${b.cv_pct}% · ${_STABILITY_ICON[b.stability] || ""} ` + - `${esc(b.stability || "")} (n=${b.n})`; + // Format: "4.19% ⚠ noisy (n=14)" or "12.4% high-variance (n=14)". + // Icon is omitted (not just blank) for high-variance so we don't render + // a dangling "·" — the label and colour carry the meaning instead. + const fmtBlock = (b) => { + const icon = _STABILITY_ICON[b.stability] || ""; + const head = icon ? `${b.cv_pct}% · ${icon}` : `${b.cv_pct}%`; + return `${head} ${esc(b.stability || "")} (n=${b.n})`; + }; // offline — show the worst (largest CV) of all client_concurrency rows. // That's the limiting concurrency for stability claims. diff --git a/loadgen/loadgen.py b/loadgen/loadgen.py index d4e2e0bd..7a917108 100644 --- a/loadgen/loadgen.py +++ b/loadgen/loadgen.py @@ -78,12 +78,23 @@ def _percentile(data: list, p: float): # is non-positive, in which case the frontend hides the badge entirely so # users do not see a meaningless "stable ✓" on a single-run measurement. -# Stability thresholds. These are intentionally permissive on first launch — -# real-world hardware noise (especially memory thrashing on first cycle) -# regularly crosses 2 % even on healthy systems. Tune in the schema after we -# observe the first wave of submissions. -_STABILITY_THRESHOLD_STABLE_PCT = 2.0 -_STABILITY_THRESHOLD_NOISY_PCT = 5.0 +# Stability thresholds. Calibrated from the initial 255-result sustained +# backfill (May 2026), which had a CV median of 3.1 % and p90 of 13.1 %. +# Tighter thresholds (e.g. ≤ 2 % / ≤ 5 %) labelled the literal median run +# "noisy" and ~30 % of submissions "unstable" — those labels were too +# pejorative for what is really normal hardware jitter. The buckets here +# split the empirical distribution into ~48 % stable / ~35 % noisy / +# ~17 % high-variance, with the high-variance bucket dominated by chips +# we expect to genuinely throttle (RTX 5090, A6000, V100s) or scale-out +# topologies with real network jitter (Ascend ×16). +# +# Important wording choice: the third tier is named "high-variance", not +# "unstable", because high CV does not mean the measurement is wrong — it +# means the headline number carries irreducible variability the reader +# should be aware of. The frontend reflects this with a colour cue and +# no error glyph; "high-variance" is a description, not a verdict. +_STABILITY_THRESHOLD_STABLE_PCT = 3.0 +_STABILITY_THRESHOLD_NOISY_PCT = 8.0 def _cv_pct(values: list) -> Optional[float]: @@ -102,14 +113,14 @@ def _cv_pct(values: list) -> Optional[float]: def _stability_label(cv_pct: Optional[float]) -> Optional[str]: - """Map a CV percentage to a stable/noisy/unstable label, or None.""" + """Map a CV percentage to a stable / noisy / high-variance label, or None.""" if cv_pct is None: return None if cv_pct <= _STABILITY_THRESHOLD_STABLE_PCT: return "stable" if cv_pct <= _STABILITY_THRESHOLD_NOISY_PCT: return "noisy" - return "unstable" + return "high-variance" def _reliability_block(values: list, *, decimals: int = 2) -> dict: diff --git a/loadgen/tests/test_reliability.py b/loadgen/tests/test_reliability.py index 6cdc047d..c2d4ea1b 100644 --- a/loadgen/tests/test_reliability.py +++ b/loadgen/tests/test_reliability.py @@ -45,11 +45,13 @@ def test_cv_pct_returns_none_for_small_or_invalid_input(): def test_stability_labels(): + # Boundaries: ≤3% stable, ≤8% noisy, >8% high-variance. assert _stability_label(0.5) == "stable" - assert _stability_label(2.0) == "stable" # inclusive boundary - assert _stability_label(3.0) == "noisy" - assert _stability_label(5.0) == "noisy" # inclusive boundary - assert _stability_label(7.0) == "unstable" + assert _stability_label(3.0) == "stable" # inclusive boundary + assert _stability_label(3.01) == "noisy" + assert _stability_label(8.0) == "noisy" # inclusive boundary + assert _stability_label(8.01) == "high-variance" + assert _stability_label(20.0) == "high-variance" assert _stability_label(None) is None @@ -150,7 +152,7 @@ def test_offline_emits_throughput_reliability(tmp_path): assert rel, "offline scenario did not emit reliability block" assert rel["n"] == 3 assert rel["cv_pct"] is not None - assert rel["stability"] in {"stable", "noisy", "unstable"} + assert rel["stability"] in {"stable", "noisy", "high-variance"} assert len(rel["runs"]) == 3 @@ -214,7 +216,7 @@ def test_sustained_emits_throughput_post_warmup_reliability(tmp_path): # cv_pct may be None if not enough post-warmup samples landed; we only # require the field exists. When n >= 2 the stability must be set. if rel.get("n", 0) >= 2: - assert rel["stability"] in {"stable", "noisy", "unstable"} + assert rel["stability"] in {"stable", "noisy", "high-variance"} def test_burst_emits_recovery_time_seconds(tmp_path): diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json index 126a4230..e110ab18 100644 --- a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json +++ b/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json @@ -442,7 +442,7 @@ "mean": 54.9, "std": 4.9, "cv_pct": 8.95, - "stability": "unstable", + "stability": "high-variance", "runs": [ 56.2, 56.3, diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json index 11df3df3..54c9f403 100644 --- a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json +++ b/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json @@ -333,7 +333,7 @@ "mean": 7095.4, "std": 147.1, "cv_pct": 2.07, - "stability": "noisy", + "stability": "stable", "runs": [ 6616.4, 7181.7, diff --git a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json index 4e41b5a9..442ac69f 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json @@ -523,7 +523,7 @@ "mean": 53.2, "std": 5.6, "cv_pct": 10.6, - "stability": "unstable", + "stability": "high-variance", "runs": [ 46.9, 59.7, diff --git a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json index 7d4de213..c4af0d0a 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json @@ -481,7 +481,7 @@ "mean": 53.2, "std": 5.6, "cv_pct": 10.6, - "stability": "unstable", + "stability": "high-variance", "runs": [ 46.9, 59.7, diff --git a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json index fa6e477b..1c40ff18 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json @@ -580,7 +580,7 @@ "mean": 53.2, "std": 7.0, "cv_pct": 13.23, - "stability": "unstable", + "stability": "high-variance", "runs": [ 56.7, 55.0, diff --git a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json index 9574beb6..1511541c 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json @@ -481,7 +481,7 @@ "mean": 53.2, "std": 7.0, "cv_pct": 13.23, - "stability": "unstable", + "stability": "high-variance", "runs": [ 56.7, 55.0, diff --git a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json index 4db5920a..eb69665d 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json @@ -577,7 +577,7 @@ "mean": 226.6, "std": 11.3, "cv_pct": 5.01, - "stability": "unstable", + "stability": "noisy", "runs": [ 217.7, 215.0, diff --git a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json index d64f4c41..9c23b4c0 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json @@ -481,7 +481,7 @@ "mean": 226.6, "std": 11.3, "cv_pct": 5.01, - "stability": "unstable", + "stability": "noisy", "runs": [ 217.7, 215.0, diff --git a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json index ed241d8c..2b038c2a 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json @@ -627,7 +627,7 @@ "mean": 53.5, "std": 7.9, "cv_pct": 14.84, - "stability": "unstable", + "stability": "high-variance", "runs": [ 67.8, 42.6, diff --git a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json index 7940fe3b..f87c4b33 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json @@ -528,7 +528,7 @@ "mean": 53.5, "std": 7.9, "cv_pct": 14.84, - "stability": "unstable", + "stability": "high-variance", "runs": [ 67.8, 42.6, diff --git a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json index be7c0d9d..7ee86f6b 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json @@ -570,7 +570,7 @@ "mean": 54.2, "std": 3.8, "cv_pct": 7.06, - "stability": "unstable", + "stability": "noisy", "runs": [ 46.9, 55.4, diff --git a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json index c230659d..125e8f48 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json @@ -528,7 +528,7 @@ "mean": 54.2, "std": 3.8, "cv_pct": 7.06, - "stability": "unstable", + "stability": "noisy", "runs": [ 46.9, 55.4, diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json index cdd18aab..649a377c 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json @@ -362,7 +362,7 @@ "mean": 491.9, "std": 14.0, "cv_pct": 2.85, - "stability": "noisy", + "stability": "stable", "runs": [ 459.7, 507.7, diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json index efdcdade..1cab7952 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json @@ -253,7 +253,7 @@ "mean": 491.9, "std": 14.0, "cv_pct": 2.85, - "stability": "noisy", + "stability": "stable", "runs": [ 459.7, 507.7, diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json index d0b750f9..20645b1e 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json @@ -362,7 +362,7 @@ "mean": 813.5, "std": 17.7, "cv_pct": 2.17, - "stability": "noisy", + "stability": "stable", "runs": [ 778.9, 820.5, diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json index a02ef774..69a88e9e 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json @@ -253,7 +253,7 @@ "mean": 813.5, "std": 17.7, "cv_pct": 2.17, - "stability": "noisy", + "stability": "stable", "runs": [ 778.9, 820.5, diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json index 72e20277..e444f214 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json @@ -445,7 +445,7 @@ "mean": 57.0, "std": 7.4, "cv_pct": 13.07, - "stability": "unstable", + "stability": "high-variance", "runs": [ 51.2, 55.5, diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json index e17dfb22..75aad313 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json @@ -403,7 +403,7 @@ "mean": 57.0, "std": 7.4, "cv_pct": 13.07, - "stability": "unstable", + "stability": "high-variance", "runs": [ 51.2, 55.5, diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json index 6da85580..65a2c792 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json @@ -580,7 +580,7 @@ "mean": 164.3, "std": 8.5, "cv_pct": 5.2, - "stability": "unstable", + "stability": "noisy", "runs": [ 167.7, 172.9, diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json index 1ee4778e..33933391 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json @@ -478,7 +478,7 @@ "mean": 164.3, "std": 8.5, "cv_pct": 5.2, - "stability": "unstable", + "stability": "noisy", "runs": [ 167.7, 172.9, diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json index da9b0f7c..fe47c5a0 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json @@ -577,7 +577,7 @@ "mean": 472.7, "std": 11.2, "cv_pct": 2.38, - "stability": "noisy", + "stability": "stable", "runs": [ 482.4, 470.0, diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json index b5aef3f7..c6564030 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json @@ -478,7 +478,7 @@ "mean": 472.7, "std": 11.2, "cv_pct": 2.38, - "stability": "noisy", + "stability": "stable", "runs": [ 482.4, 470.0, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json index 92d37745..eefbf857 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json @@ -363,7 +363,7 @@ "mean": 706.9, "std": 19.3, "cv_pct": 2.73, - "stability": "noisy", + "stability": "stable", "runs": [ 655.0, 711.0, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json index 967d4064..1a000d46 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json @@ -250,7 +250,7 @@ "mean": 706.9, "std": 19.3, "cv_pct": 2.73, - "stability": "noisy", + "stability": "stable", "runs": [ 655.0, 711.0, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json index e9eb5a19..65f49b8f 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json @@ -368,7 +368,7 @@ "mean": 437.3, "std": 11.6, "cv_pct": 2.66, - "stability": "noisy", + "stability": "stable", "runs": [ 409.4, 431.5, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json index ed999f4c..ab83ad8d 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json @@ -255,7 +255,7 @@ "mean": 437.3, "std": 11.6, "cv_pct": 2.66, - "stability": "noisy", + "stability": "stable", "runs": [ 409.4, 431.5, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json index 16534425..feea95d5 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json @@ -368,7 +368,7 @@ "mean": 494.1, "std": 12.0, "cv_pct": 2.42, - "stability": "noisy", + "stability": "stable", "runs": [ 456.8, 504.0, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json index 12cd1900..f1ef7eaa 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json @@ -255,7 +255,7 @@ "mean": 494.1, "std": 12.0, "cv_pct": 2.42, - "stability": "noisy", + "stability": "stable", "runs": [ 456.8, 504.0, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json index 7e9b7e10..592ad309 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json @@ -367,7 +367,7 @@ "mean": 709.7, "std": 15.4, "cv_pct": 2.18, - "stability": "noisy", + "stability": "stable", "runs": [ 741.9, 718.6, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json index 29aab15e..0a71eb6e 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json @@ -258,7 +258,7 @@ "mean": 709.7, "std": 15.4, "cv_pct": 2.18, - "stability": "noisy", + "stability": "stable", "runs": [ 741.9, 718.6, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json index aba7eea1..de77ce59 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json @@ -367,7 +367,7 @@ "mean": 694.8, "std": 17.4, "cv_pct": 2.51, - "stability": "noisy", + "stability": "stable", "runs": [ 728.5, 716.1, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json index 2a52a892..1710fbb0 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json @@ -258,7 +258,7 @@ "mean": 694.8, "std": 17.4, "cv_pct": 2.51, - "stability": "noisy", + "stability": "stable", "runs": [ 728.5, 716.1, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json index c4a7d19c..4e391997 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json @@ -367,7 +367,7 @@ "mean": 643.9, "std": 13.7, "cv_pct": 2.12, - "stability": "noisy", + "stability": "stable", "runs": [ 655.9, 662.5, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json index e77a2cc5..5685fadc 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json @@ -258,7 +258,7 @@ "mean": 643.9, "std": 13.7, "cv_pct": 2.12, - "stability": "noisy", + "stability": "stable", "runs": [ 655.9, 662.5, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json index c84caf5f..fa0ebd2e 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json @@ -444,7 +444,7 @@ "mean": 58.7, "std": 3.2, "cv_pct": 5.46, - "stability": "unstable", + "stability": "noisy", "runs": [ 55.4, 59.7, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json index a311a3ac..23ccbdfa 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json @@ -400,7 +400,7 @@ "mean": 58.7, "std": 3.2, "cv_pct": 5.46, - "stability": "unstable", + "stability": "noisy", "runs": [ 55.4, 59.7, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json index f17a3a85..b72baa3a 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json @@ -450,7 +450,7 @@ "mean": 67.1, "std": 6.4, "cv_pct": 9.6, - "stability": "unstable", + "stability": "high-variance", "runs": [ 68.2, 68.3, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json index e6a370cc..aa42e23f 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json @@ -408,7 +408,7 @@ "mean": 67.1, "std": 6.4, "cv_pct": 9.6, - "stability": "unstable", + "stability": "high-variance", "runs": [ 68.2, 68.3, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json index 6fd4617e..c9818acf 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json @@ -342,7 +342,7 @@ "mean": 2386.8, "std": 225.6, "cv_pct": 9.45, - "stability": "unstable", + "stability": "high-variance", "runs": [ 2796.4, 2979.6, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json index 367eaf0c..27d853c5 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json @@ -258,7 +258,7 @@ "mean": 2386.8, "std": 225.6, "cv_pct": 9.45, - "stability": "unstable", + "stability": "high-variance", "runs": [ 2796.4, 2979.6, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json index 41c11393..8d42221b 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json @@ -367,7 +367,7 @@ "mean": 534.5, "std": 12.1, "cv_pct": 2.26, - "stability": "noisy", + "stability": "stable", "runs": [ 559.4, 525.8, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json index 26bdb582..36af61f6 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json @@ -258,7 +258,7 @@ "mean": 534.5, "std": 12.1, "cv_pct": 2.26, - "stability": "noisy", + "stability": "stable", "runs": [ 559.4, 525.8, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json index 23828891..68f0f389 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json @@ -367,7 +367,7 @@ "mean": 829.8, "std": 56.4, "cv_pct": 6.8, - "stability": "unstable", + "stability": "noisy", "runs": [ 836.7, 902.0, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json index 3319b45c..0bd06286 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json @@ -258,7 +258,7 @@ "mean": 829.8, "std": 56.4, "cv_pct": 6.8, - "stability": "unstable", + "stability": "noisy", "runs": [ 836.7, 902.0, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json index 810fda95..ac12dd14 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json @@ -367,7 +367,7 @@ "mean": 745.0, "std": 40.9, "cv_pct": 5.49, - "stability": "unstable", + "stability": "noisy", "runs": [ 738.9, 809.0, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json index d1b31b83..ff160bbc 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json @@ -258,7 +258,7 @@ "mean": 745.0, "std": 40.9, "cv_pct": 5.49, - "stability": "unstable", + "stability": "noisy", "runs": [ 738.9, 809.0, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json index eacbbf9f..57802246 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json @@ -367,7 +367,7 @@ "mean": 715.7, "std": 43.7, "cv_pct": 6.1, - "stability": "unstable", + "stability": "noisy", "runs": [ 717.7, 704.7, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json index f9f0da01..9ae0eda1 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json @@ -258,7 +258,7 @@ "mean": 715.7, "std": 43.7, "cv_pct": 6.1, - "stability": "unstable", + "stability": "noisy", "runs": [ 717.7, 704.7, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json index d6d62308..07871c94 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json @@ -450,7 +450,7 @@ "mean": 67.0, "std": 6.4, "cv_pct": 9.57, - "stability": "unstable", + "stability": "high-variance", "runs": [ 68.2, 68.3, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json index 4e95085a..bbb4b142 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json @@ -408,7 +408,7 @@ "mean": 67.0, "std": 6.4, "cv_pct": 9.57, - "stability": "unstable", + "stability": "high-variance", "runs": [ 68.2, 68.3, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json index 2193513b..d52a285a 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json @@ -342,7 +342,7 @@ "mean": 2804.8, "std": 516.6, "cv_pct": 18.42, - "stability": "unstable", + "stability": "high-variance", "runs": [ 3817.9, 4192.3, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json index 9c8f54a6..38392d34 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json @@ -258,7 +258,7 @@ "mean": 2804.8, "std": 516.6, "cv_pct": 18.42, - "stability": "unstable", + "stability": "high-variance", "runs": [ 3817.9, 4192.3, diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json index ae0b7af5..42bded81 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json @@ -580,7 +580,7 @@ "mean": 184.0, "std": 9.5, "cv_pct": 5.15, - "stability": "unstable", + "stability": "noisy", "runs": [ 180.1, 187.5, diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json index 3330dbb3..491c6168 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json @@ -478,7 +478,7 @@ "mean": 184.0, "std": 9.5, "cv_pct": 5.15, - "stability": "unstable", + "stability": "noisy", "runs": [ 180.1, 187.5, diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json index cec7cb53..b9121bec 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json @@ -372,7 +372,7 @@ "mean": 472.4, "std": 11.9, "cv_pct": 2.51, - "stability": "noisy", + "stability": "stable", "runs": [ 455.4, 475.3, diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json index dce17a6b..687beb9b 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json @@ -263,7 +263,7 @@ "mean": 472.4, "std": 11.9, "cv_pct": 2.51, - "stability": "noisy", + "stability": "stable", "runs": [ 455.4, 475.3, diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json index a31946b8..8a210904 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json @@ -372,7 +372,7 @@ "mean": 475.6, "std": 12.6, "cv_pct": 2.64, - "stability": "noisy", + "stability": "stable", "runs": [ 449.0, 494.9, diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json index f10b3791..0c81366f 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json @@ -263,7 +263,7 @@ "mean": 475.6, "std": 12.6, "cv_pct": 2.64, - "stability": "noisy", + "stability": "stable", "runs": [ 449.0, 494.9, diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json index 3de48f66..728692e0 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json @@ -347,7 +347,7 @@ "mean": 2693.3, "std": 140.0, "cv_pct": 5.2, - "stability": "unstable", + "stability": "noisy", "runs": [ 3023.8, 2938.2, diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json index eb6e46df..9b8e14c5 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json @@ -263,7 +263,7 @@ "mean": 2693.3, "std": 140.0, "cv_pct": 5.2, - "stability": "unstable", + "stability": "noisy", "runs": [ 3023.8, 2938.2, diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json index 1cc10d7f..cac12c65 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json @@ -376,7 +376,7 @@ "mean": 640.7, "std": 13.7, "cv_pct": 2.14, - "stability": "noisy", + "stability": "stable", "runs": [ 603.2, 639.6, diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json index 473a811c..4983b045 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json @@ -267,7 +267,7 @@ "mean": 640.7, "std": 13.7, "cv_pct": 2.14, - "stability": "noisy", + "stability": "stable", "runs": [ 603.2, 639.6, diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json index bd184764..8a8b9f06 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json @@ -376,7 +376,7 @@ "mean": 854.9, "std": 19.1, "cv_pct": 2.23, - "stability": "noisy", + "stability": "stable", "runs": [ 814.1, 856.6, diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json index b6150f92..97de99c0 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json @@ -267,7 +267,7 @@ "mean": 854.9, "std": 19.1, "cv_pct": 2.23, - "stability": "noisy", + "stability": "stable", "runs": [ 814.1, 856.6, diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json index e8657353..95316948 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json @@ -376,7 +376,7 @@ "mean": 628.8, "std": 16.5, "cv_pct": 2.63, - "stability": "noisy", + "stability": "stable", "runs": [ 593.7, 648.2, diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json index 3aa11072..708866f5 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json @@ -267,7 +267,7 @@ "mean": 628.8, "std": 16.5, "cv_pct": 2.63, - "stability": "noisy", + "stability": "stable", "runs": [ 593.7, 648.2, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json index 4399b583..16fdc40b 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json @@ -517,7 +517,7 @@ "mean": 339.8, "std": 8.8, "cv_pct": 2.58, - "stability": "noisy", + "stability": "stable", "runs": [ 334.4, 352.3, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json index 21a9742a..c96c9708 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json @@ -417,7 +417,7 @@ "mean": 339.8, "std": 8.8, "cv_pct": 2.58, - "stability": "noisy", + "stability": "stable", "runs": [ 334.4, 352.3, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json index 07afef8c..c619c421 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json @@ -380,7 +380,7 @@ "mean": 472.1, "std": 11.5, "cv_pct": 2.44, - "stability": "noisy", + "stability": "stable", "runs": [ 445.8, 458.1, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json index 13a289a2..8f68cc7a 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json @@ -267,7 +267,7 @@ "mean": 472.1, "std": 11.5, "cv_pct": 2.44, - "stability": "noisy", + "stability": "stable", "runs": [ 445.8, 458.1, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json index b8cc75c0..99318739 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json @@ -380,7 +380,7 @@ "mean": 606.2, "std": 15.7, "cv_pct": 2.59, - "stability": "noisy", + "stability": "stable", "runs": [ 584.6, 597.4, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json index e394a5a0..a18a7e21 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json @@ -267,7 +267,7 @@ "mean": 606.2, "std": 15.7, "cv_pct": 2.59, - "stability": "noisy", + "stability": "stable", "runs": [ 584.6, 597.4, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json index 1c05e652..128bca26 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json @@ -380,7 +380,7 @@ "mean": 438.9, "std": 10.7, "cv_pct": 2.43, - "stability": "noisy", + "stability": "stable", "runs": [ 419.8, 434.5, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json index a6d71b51..8f9aa498 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json @@ -267,7 +267,7 @@ "mean": 438.9, "std": 10.7, "cv_pct": 2.43, - "stability": "noisy", + "stability": "stable", "runs": [ 419.8, 434.5, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json index cdcfdb99..bcc41c80 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json @@ -354,7 +354,7 @@ "mean": 1698.1, "std": 39.2, "cv_pct": 2.31, - "stability": "noisy", + "stability": "stable", "runs": [ 1715.3, 1751.9, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json index c78793fb..d1779d30 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json @@ -267,7 +267,7 @@ "mean": 1698.1, "std": 39.2, "cv_pct": 2.31, - "stability": "noisy", + "stability": "stable", "runs": [ 1715.3, 1751.9, diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json index 3e34cc78..1c0fe3d4 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json @@ -559,7 +559,7 @@ "mean": 104.5, "std": 7.2, "cv_pct": 6.86, - "stability": "unstable", + "stability": "noisy", "runs": [ 96.8, 106.6, diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json index a6287796..249553cd 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json @@ -457,7 +457,7 @@ "mean": 104.5, "std": 7.2, "cv_pct": 6.86, - "stability": "unstable", + "stability": "noisy", "runs": [ 96.8, 106.6, diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json index 67f09696..678c24fd 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json @@ -556,7 +556,7 @@ "mean": 325.5, "std": 9.7, "cv_pct": 2.99, - "stability": "noisy", + "stability": "stable", "runs": [ 348.2, 317.8, diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json index 76ba3ee7..405ca5df 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json @@ -457,7 +457,7 @@ "mean": 325.5, "std": 9.7, "cv_pct": 2.99, - "stability": "noisy", + "stability": "stable", "runs": [ 348.2, 317.8, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json index 262863b1..cc31153f 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json @@ -431,7 +431,7 @@ "mean": 676.2, "std": 105.7, "cv_pct": 15.64, - "stability": "unstable", + "stability": "high-variance", "runs": [ 310.3, 701.9, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json index b7c57364..7a3c03f4 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json @@ -318,7 +318,7 @@ "mean": 676.2, "std": 105.7, "cv_pct": 15.64, - "stability": "unstable", + "stability": "high-variance", "runs": [ 310.3, 701.9, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json index b783b3b3..3cb556fc 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json @@ -431,7 +431,7 @@ "mean": 1381.4, "std": 213.7, "cv_pct": 15.47, - "stability": "unstable", + "stability": "high-variance", "runs": [ 641.1, 1449.1, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json index b765c762..ca9ac78b 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json @@ -318,7 +318,7 @@ "mean": 1381.4, "std": 213.7, "cv_pct": 15.47, - "stability": "unstable", + "stability": "high-variance", "runs": [ 641.1, 1449.1, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json index 3ef8e5df..770f027f 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json @@ -431,7 +431,7 @@ "mean": 1148.7, "std": 204.2, "cv_pct": 17.78, - "stability": "unstable", + "stability": "high-variance", "runs": [ 440.3, 1192.8, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json index 2d1560bf..15ef4dbb 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json @@ -318,7 +318,7 @@ "mean": 1148.7, "std": 204.2, "cv_pct": 17.78, - "stability": "unstable", + "stability": "high-variance", "runs": [ 440.3, 1192.8, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json index 8219aee4..1cf0d911 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json @@ -365,7 +365,7 @@ "mean": 3941.2, "std": 974.0, "cv_pct": 24.71, - "stability": "unstable", + "stability": "high-variance", "runs": [ 683.0, 4100.0, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json index 57d94568..45594d91 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json @@ -278,7 +278,7 @@ "mean": 3941.2, "std": 974.0, "cv_pct": 24.71, - "stability": "unstable", + "stability": "high-variance", "runs": [ 683.0, 4100.0, diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json index d6d790d1..9e207d46 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json @@ -427,7 +427,7 @@ "mean": 142.6, "std": 13.3, "cv_pct": 9.33, - "stability": "unstable", + "stability": "high-variance", "runs": [ 136.5, 136.6, diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json index e50109d1..5ebc9840 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json @@ -383,7 +383,7 @@ "mean": 142.6, "std": 13.3, "cv_pct": 9.33, - "stability": "unstable", + "stability": "high-variance", "runs": [ 136.5, 136.6, diff --git a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json index 8b542132..6a5294d5 100644 --- a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json +++ b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json @@ -532,7 +532,7 @@ "mean": 709.2, "std": 40.9, "cv_pct": 5.77, - "stability": "unstable", + "stability": "noisy", "runs": [ 851.5, 842.9, diff --git a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json index 70d91c0f..bd40f8e2 100644 --- a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json @@ -434,7 +434,7 @@ "mean": 709.2, "std": 40.9, "cv_pct": 5.77, - "stability": "unstable", + "stability": "noisy", "runs": [ 851.5, 842.9, diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json index 5bacdc39..3784d212 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json @@ -463,7 +463,7 @@ "mean": 713.4, "std": 21.2, "cv_pct": 2.97, - "stability": "noisy", + "stability": "stable", "runs": [ 736.7, 762.2, diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json index a1017f7e..785ddd38 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json @@ -354,7 +354,7 @@ "mean": 713.4, "std": 21.2, "cv_pct": 2.97, - "stability": "noisy", + "stability": "stable", "runs": [ 736.7, 762.2, diff --git a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json index c5b80fc8..023009e0 100644 --- a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json +++ b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json @@ -477,7 +477,7 @@ "mean": 132.9, "std": 14.2, "cv_pct": 10.69, - "stability": "unstable", + "stability": "high-variance", "runs": [ 136.5, 136.6, diff --git a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json index e9e264b1..b27c0571 100644 --- a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json @@ -434,7 +434,7 @@ "mean": 132.9, "std": 14.2, "cv_pct": 10.69, - "stability": "unstable", + "stability": "high-variance", "runs": [ 136.5, 136.6, diff --git a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json index 60bc2c9d..0519bb09 100644 --- a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json +++ b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json @@ -602,7 +602,7 @@ "mean": 241.2, "std": 12.7, "cv_pct": 5.28, - "stability": "unstable", + "stability": "noisy", "runs": [ 283.6, 281.0, diff --git a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json index 4d23c22e..ee0e251c 100644 --- a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json +++ b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json @@ -503,7 +503,7 @@ "mean": 241.2, "std": 12.7, "cv_pct": 5.28, - "stability": "unstable", + "stability": "noisy", "runs": [ 283.6, 281.0, diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json index 5e8e3865..a44887c8 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json @@ -387,7 +387,7 @@ "mean": 494.9, "std": 12.6, "cv_pct": 2.55, - "stability": "noisy", + "stability": "stable", "runs": [ 492.6, 482.1, diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json index e50503cb..489ee85f 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json @@ -278,7 +278,7 @@ "mean": 494.9, "std": 12.6, "cv_pct": 2.55, - "stability": "noisy", + "stability": "stable", "runs": [ 492.6, 482.1, diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json index 40be3d5f..f00600e8 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json @@ -387,7 +387,7 @@ "mean": 645.2, "std": 18.2, "cv_pct": 2.82, - "stability": "noisy", + "stability": "stable", "runs": [ 593.7, 667.9, diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json index 9da3e271..e8396426 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json @@ -278,7 +278,7 @@ "mean": 645.2, "std": 18.2, "cv_pct": 2.82, - "stability": "noisy", + "stability": "stable", "runs": [ 593.7, 667.9, diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json index dad65d4b..c897b869 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json @@ -387,7 +387,7 @@ "mean": 533.8, "std": 12.3, "cv_pct": 2.3, - "stability": "noisy", + "stability": "stable", "runs": [ 501.4, 534.3, diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json index c3467ba5..9627dbbe 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json @@ -278,7 +278,7 @@ "mean": 533.8, "std": 12.3, "cv_pct": 2.3, - "stability": "noisy", + "stability": "stable", "runs": [ 501.4, 534.3, diff --git a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json index 601fa1f0..3b28d14f 100644 --- a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json @@ -470,7 +470,7 @@ "mean": 41.4, "std": 14.3, "cv_pct": 34.47, - "stability": "unstable", + "stability": "high-variance", "runs": [ 34.1, 34.2, diff --git a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json index 92c49d4f..839bccf7 100644 --- a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json @@ -428,7 +428,7 @@ "mean": 41.4, "std": 14.3, "cv_pct": 34.47, - "stability": "unstable", + "stability": "high-variance", "runs": [ 34.1, 34.2, diff --git a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json index a6161fb2..2d9a7a00 100644 --- a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json +++ b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json @@ -483,7 +483,7 @@ "mean": 116.6, "std": 10.5, "cv_pct": 8.99, - "stability": "unstable", + "stability": "high-variance", "runs": [ 98.7, 132.0, diff --git a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json index d2032ab4..49880a7c 100644 --- a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json +++ b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json @@ -383,7 +383,7 @@ "mean": 116.6, "std": 10.5, "cv_pct": 8.99, - "stability": "unstable", + "stability": "high-variance", "runs": [ 98.7, 132.0, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json index 2ef7b6b9..9eee1e93 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json @@ -484,7 +484,7 @@ "mean": 376.2, "std": 8.1, "cv_pct": 2.15, - "stability": "noisy", + "stability": "stable", "runs": [ 375.9, 376.2, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json index fadd0afd..ad67a5c4 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json @@ -383,7 +383,7 @@ "mean": 376.2, "std": 8.1, "cv_pct": 2.15, - "stability": "noisy", + "stability": "stable", "runs": [ 375.9, 376.2, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json index 0f5a1978..f8c9b082 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json @@ -346,7 +346,7 @@ "mean": 371.9, "std": 10.0, "cv_pct": 2.69, - "stability": "noisy", + "stability": "stable", "runs": [ 348.3, 386.1, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json index 2247a062..001df410 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json @@ -233,7 +233,7 @@ "mean": 371.9, "std": 10.0, "cv_pct": 2.69, - "stability": "noisy", + "stability": "stable", "runs": [ 348.3, 386.1, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json index 89695625..bab319b7 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json @@ -427,7 +427,7 @@ "mean": 32.3, "std": 11.7, "cv_pct": 36.18, - "stability": "unstable", + "stability": "high-variance", "runs": [ 25.6, 25.6, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json index ae235b6a..0667e527 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json @@ -383,7 +383,7 @@ "mean": 32.3, "std": 11.7, "cv_pct": 36.18, - "stability": "unstable", + "stability": "high-variance", "runs": [ 25.6, 25.6, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json index b4d5719f..61f97879 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json @@ -320,7 +320,7 @@ "mean": 2895.0, "std": 197.6, "cv_pct": 6.83, - "stability": "unstable", + "stability": "noisy", "runs": [ 3526.2, 3053.6, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json index 3e00a02f..97bc5729 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json @@ -233,7 +233,7 @@ "mean": 2895.0, "std": 197.6, "cv_pct": 6.83, - "stability": "unstable", + "stability": "noisy", "runs": [ 3526.2, 3053.6, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json index 55b7c653..083af211 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json @@ -397,7 +397,7 @@ "mean": 435.5, "std": 11.5, "cv_pct": 2.65, - "stability": "noisy", + "stability": "stable", "runs": [ 407.9, 438.6, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json index dcc4044d..8fc25e19 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json @@ -288,7 +288,7 @@ "mean": 435.5, "std": 11.5, "cv_pct": 2.65, - "stability": "noisy", + "stability": "stable", "runs": [ 407.9, 438.6, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json index d7acc44f..70a58cb5 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json @@ -397,7 +397,7 @@ "mean": 541.0, "std": 40.7, "cv_pct": 7.53, - "stability": "unstable", + "stability": "noisy", "runs": [ 568.6, 600.0, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json index 186d212d..c9789133 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json @@ -288,7 +288,7 @@ "mean": 541.0, "std": 40.7, "cv_pct": 7.53, - "stability": "unstable", + "stability": "noisy", "runs": [ 568.6, 600.0, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json index 7e942d09..958e98f2 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json @@ -397,7 +397,7 @@ "mean": 433.8, "std": 9.6, "cv_pct": 2.22, - "stability": "noisy", + "stability": "stable", "runs": [ 409.8, 439.3, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json index 99819324..464d45b6 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json @@ -288,7 +288,7 @@ "mean": 433.8, "std": 9.6, "cv_pct": 2.22, - "stability": "noisy", + "stability": "stable", "runs": [ 409.8, 439.3, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json index 36449e18..70e12fab 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json @@ -482,7 +482,7 @@ "mean": 30.5, "std": 10.5, "cv_pct": 34.45, - "stability": "unstable", + "stability": "high-variance", "runs": [ 25.6, 25.6, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json index 68bdb8eb..1695366c 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json @@ -438,7 +438,7 @@ "mean": 30.5, "std": 10.5, "cv_pct": 34.45, - "stability": "unstable", + "stability": "high-variance", "runs": [ 25.6, 25.6, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json index 7ec3f32e..16faa2cf 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json @@ -375,7 +375,7 @@ "mean": 1917.3, "std": 286.8, "cv_pct": 14.96, - "stability": "unstable", + "stability": "high-variance", "runs": [ 2197.7, 2238.6, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json index 1c8b81b0..96ccbaf5 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json @@ -288,7 +288,7 @@ "mean": 1917.3, "std": 286.8, "cv_pct": 14.96, - "stability": "unstable", + "stability": "high-variance", "runs": [ 2197.7, 2238.6, diff --git a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json index 615b2be1..5e4a0c73 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json @@ -580,7 +580,7 @@ "mean": 105.3, "std": 6.8, "cv_pct": 6.47, - "stability": "unstable", + "stability": "noisy", "runs": [ 108.7, 106.6, diff --git a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json index 27ee58fc..6b364b39 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json @@ -478,7 +478,7 @@ "mean": 105.3, "std": 6.8, "cv_pct": 6.47, - "stability": "unstable", + "stability": "noisy", "runs": [ 108.7, 106.6, diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json index 37fc0648..b93e3207 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json @@ -367,7 +367,7 @@ "mean": 416.4, "std": 10.9, "cv_pct": 2.62, - "stability": "noisy", + "stability": "stable", "runs": [ 400.3, 404.0, diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json index f21b9a41..8f855f82 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json @@ -258,7 +258,7 @@ "mean": 416.4, "std": 10.9, "cv_pct": 2.62, - "stability": "noisy", + "stability": "stable", "runs": [ 400.3, 404.0, diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json index 02547706..5e3dda03 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json @@ -450,7 +450,7 @@ "mean": 14.9, "std": 4.4, "cv_pct": 29.72, - "stability": "unstable", + "stability": "high-variance", "runs": [ 17.1, 17.1, diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json index eac8dc1e..7862c4c8 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json @@ -408,7 +408,7 @@ "mean": 14.9, "std": 4.4, "cv_pct": 29.72, - "stability": "unstable", + "stability": "high-variance", "runs": [ 17.1, 17.1, diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json index 96ee6b89..27dd8c63 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json @@ -577,7 +577,7 @@ "mean": 92.8, "std": 8.0, "cv_pct": 8.63, - "stability": "unstable", + "stability": "high-variance", "runs": [ 86.6, 95.9, diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json index ba0678ca..e34441a4 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json @@ -478,7 +478,7 @@ "mean": 92.8, "std": 8.0, "cv_pct": 8.63, - "stability": "unstable", + "stability": "high-variance", "runs": [ 86.6, 95.9, diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json index 22487a5c..3d656bed 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json @@ -577,7 +577,7 @@ "mean": 293.9, "std": 15.7, "cv_pct": 5.35, - "stability": "unstable", + "stability": "noisy", "runs": [ 311.7, 301.1, diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json index 893bd657..ab2680be 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json @@ -478,7 +478,7 @@ "mean": 293.9, "std": 15.7, "cv_pct": 5.35, - "stability": "unstable", + "stability": "noisy", "runs": [ 311.7, 301.1, From ecae9258cd00ddb145cd56da403270f426ba46ff Mon Sep 17 00:00:00 2001 From: Liang Juhao Date: Tue, 19 May 2026 18:39:02 +0800 Subject: [PATCH 5/5] feat(ui): explain reliability pill on hover + jump to details MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reliability badge in the modal subtitle showed only `cv X.X%` with a plain `title` tooltip, which most readers never noticed and could not decode (what does "cv" mean? what's a good number? is high-variance bad?). Three layered fixes give every user type a path to the answer: 1. The pill is now a `; } // Render one row per scenario in the Details tab. Skipped if a scenario has @@ -648,7 +702,20 @@ function _renderDetails(row, panel) { d.run_pp != null ? _detailRow("Pipeline parallel size", d.run_pp) : null, d.run_dp != null ? _detailRow("Data parallel size", d.run_dp) : null, ]), - _detailSection("Reliability", _reliabilityRows(row)), + _detailSection("Reliability", _reliabilityRows(row), { + anchor: "reliability", + // Plain HTML (not auto-escaped) so we can highlight the threshold + // pills. Keep the wording short — readers shouldn't need to read a + // paragraph to decode a single percentage in the table below. + caption: + "Inter-run coefficient of variation " + + "(CV = std / mean × 100%) across the runs that produced this " + + "submission — lower is more reproducible. " + + "✓ stable ≤ 3%  ·  " + + "⚠ noisy ≤ 8%  ·  " + + "high-variance > 8% " + + "(informational — natural jitter, not a measurement error).", + }), _detailSection("Vendor-specific environment", _vendorDetailRows(row)), _detailSection("Accuracy", [ _detailRow("Subset score", d.acc_score, {