diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 990f73bc..d57f243d 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -520,7 +520,8 @@ Copy the closest existing suite and modify. Required fields: "online_sla_ttft_ms": 500, "num_runs": 3, "warmup_runs": 1, - "online_warmup_runs": 0, + "online_warmup_requests": 10, + "burst_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 200, @@ -911,6 +912,71 @@ class InferenceResult: | speculative | Offline throughput with draft model (same path as offline, engine uses speculative decoding) | `throughput_tokens_per_sec`; optional `task.runtime_metrics.acceptance_rate` if runner overrides `get_runtime_metrics()` | | burst | Two-state bursty load: alternates steady QPS and burst QPS windows | `burst_degradation_ratio` (burst_ttft_p99 / steady_ttft_p99); `sla_met_during_burst` | +### Warmup contract + +Cold engines inflate the first few timed requests by hundreds of ms (JIT +compile, CUDA-graph allocation, KV cache priming). Each scenario discards +a configurable prefix: + +| Scenario | Suite key | Default | Unit | +|---|---|---|---| +| offline / speculative / interactive | `warmup_runs` / `interactive_warmup_runs` | `1` / `0` | full passes | +| online | `online_warmup_requests` | `10` | dummy requests fired before QPS sweep | +| burst | `burst_warmup_requests` | `10` | dummy requests fired before first cycle | +| sustained | `warmup_minutes` | `2` | minutes of samples excluded from analysis | + +Warmup-time exceptions are logged and swallowed — they never abort the +timed phase. + +### Reliability metrics + +Each scenario emits an inter-run reliability block alongside its primary +metrics so submitters can prove their results are reproducible without +shipping `samples.jsonl`. Shape: + +```json +{ + "n": 3, + "mean": 1234.5, + "std": 21.3, + "cv_pct": 1.7, + "stability": "stable", + "runs": [1230.1, 1255.2, 1218.2] +} +``` + +`stability` thresholds: `cv_pct ≤ 3 → stable ✓`, `≤ 8 → noisy ⚠`, +otherwise `high-variance`. Calibrated from the May-2026 backfill — see +the comment above `_STABILITY_THRESHOLD_*` in `loadgen/loadgen.py` for the +empirical distribution that informed the choice. Tunable centrally there. + +**`high-variance` is informational, not a verdict.** High CV means the +hardware × workload combo carries irreducible jitter (thermal throttle on +consumer cards, HCCL noise on 16-chip Ascend topologies, acceptance-rate +fluctuation on speculative decoding) — it is **not** a sign the +submission is broken. The frontend reflects this: high-variance pills +use an orange tone with no error glyph, while only stable / noisy carry +✓ / ⚠ icons. + +If you submit a result that lands as high-variance, you do not need to +re-run. The badge is for downstream readers picking hardware for +latency-sensitive workloads — they can use the CV % to size their +safety margins, while peak-throughput shoppers can largely ignore it. + +| Scenario | Field path | Reliability source | +|---|---|---| +| offline | `metrics.offline.results_by_concurrency[i].throughput_tokens_per_sec_reliability` | per-run throughput across `num_runs` | +| online | `metrics.online.results_by_qps[i].ttft_ms_p99_reliability` | per-run TTFT p99 across `num_runs` | +| interactive | `metrics.interactive.ttft_ms_p99_reliability` | per-run TTFT p99 across `num_runs` | +| sustained | `metrics.sustained.throughput_post_warmup_reliability` | per-interval throughput (post-warmup) | +| burst | `metrics.burst.recovery_time_seconds` (+ `_per_cycle`) | seconds until rolling p99 returns to ≤ 1.5× steady baseline | + +Backfilling these for existing results is done by +`tools/backfill_distribution_stats.py`, which reads each result's local +`samples.jsonl` and writes the summary stats in place. Offline reliability +cannot be backfilled because per-run throughput was never recorded in +`samples.jsonl` historically — it stays `{}` for old offline results. + --- ## Schema and Validation diff --git a/leaderboard/generate.py b/leaderboard/generate.py index bf35de21..ead41d62 100644 --- a/leaderboard/generate.py +++ b/leaderboard/generate.py @@ -6,6 +6,8 @@ python leaderboard/generate.py """ +from __future__ import annotations + import hashlib import json import re @@ -238,6 +240,11 @@ def extract_detail(result: dict) -> dict: "meta_model_load_sec": meta.get("model_load_seconds"), "meta_start_time": meta.get("benchmark_start_time"), "meta_notes": meta.get("notes"), + # Vendor-specific environment fields collected by platforms/.py + # (e.g. ROCm-SMI link health, NVML clock telemetry). The modal flattens + # this dict and shows only non-null entries — different vendors record + # different keys by design and no UI tries to unify them. + "env_vendor_details": env.get("vendor_details") or {}, } @@ -297,6 +304,9 @@ def _concurrency_labels(rows): def _online_block(): online = metrics.get("online", {}) qps_rows = online.get("results_by_qps", []) + # Per-QPS reliability blocks. Emitted as a parallel array so the + # frontend can render a badge next to each QPS row without joining + # by index from a separate object. return { "labels": [str(r.get("target_qps", "")) for r in qps_rows], "ttft_p50": [r.get("ttft_ms_p50") for r in qps_rows], @@ -304,6 +314,8 @@ def _online_block(): "tpot_p50": [r.get("tpot_ms_p50") for r in qps_rows], "sla_met": [r.get("sla_met") for r in qps_rows], "max_valid_qps": online.get("max_valid_qps"), + "ttft_ms_p99_reliability": + [r.get("ttft_ms_p99_reliability") or {} for r in qps_rows], } def _interactive_block(): @@ -315,6 +327,7 @@ def _interactive_block(): "tpot_p50": iv.get("tpot_ms_p50"), "tpot_p90": iv.get("tpot_ms_p90"), "tpot_p99": iv.get("tpot_ms_p99"), + "ttft_ms_p99_reliability": iv.get("ttft_ms_p99_reliability") or {}, } def _sustained_block(): @@ -334,6 +347,8 @@ def _sustained_block(): "throttle_ratio": s.get("throttle_ratio"), "throttle_onset_minute": s.get("throttle_onset_minute"), "ttft_p99_drift_ms": s.get("ttft_p99_drift_ms"), + "throughput_post_warmup_reliability": + s.get("throughput_post_warmup_reliability") or {}, "samples": samples, } @@ -352,6 +367,9 @@ def _burst_block(): "burst_requests_total": b.get("burst_requests_total"), "sla_met_during_burst": b.get("sla_met_during_burst"), "burst_degradation_ratio": b.get("burst_degradation_ratio"), + "recovery_time_seconds": b.get("recovery_time_seconds"), + "recovery_time_seconds_per_cycle": + b.get("recovery_time_seconds_per_cycle") or [], "results_by_cycle": b.get("results_by_cycle"), } @@ -370,6 +388,11 @@ def _speculative_block(): "mean_accepted_tokens": rm.get("mean_accepted_tokens"), } + # Per-concurrency-level offline reliability blocks. Parallel array to + # `throughput` and `memory_gb` so the frontend can join by row index. + def _offline_reliability(rows): + return [r.get("throughput_tokens_per_sec_reliability") or {} for r in rows] + if suite == "suite_A": rows = _offline_rows() return { @@ -378,6 +401,7 @@ def _speculative_block(): "labels": _concurrency_labels(rows), "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "online": _online_block(), "interactive": _interactive_block(), @@ -395,6 +419,7 @@ def _speculative_block(): "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "throughput_per_chip": [r.get("throughput_tokens_per_sec_per_chip") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "online": _online_block(), "sustained": _sustained_block(), @@ -409,6 +434,7 @@ def _speculative_block(): "labels": _concurrency_labels(rows), "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "interactive": _interactive_block(), "sustained": _sustained_block(), @@ -514,6 +540,7 @@ def _speculative_block(): "labels": _concurrency_labels(rows), "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "online": _online_block(), "interactive": _interactive_block(), @@ -530,6 +557,7 @@ def _speculative_block(): "labels": _concurrency_labels(rows), "throughput": [r.get("throughput_tokens_per_sec") for r in rows], "memory_gb": [r.get("peak_memory_gb") for r in rows], + "throughput_reliability": _offline_reliability(rows), }, "online": _online_block(), "interactive": _interactive_block(), diff --git a/leaderboard/site/assets/css/modal.css b/leaderboard/site/assets/css/modal.css index 00e8e21a..7b456224 100644 --- a/leaderboard/site/assets/css/modal.css +++ b/leaderboard/site/assets/css/modal.css @@ -127,6 +127,79 @@ margin-right: 0.3rem; } +/* Inter-run reliability pill that lives in the modal subtitle. Colours + * track --good / --warn / --bad so the existing palette controls dark-mode + * behaviour. We intentionally tone down opacity so the badge does not + * compete with the primary metric callouts above. */ +.modal-reliab-pill { + display: inline-flex; + align-items: center; + gap: 0.25rem; + padding: 0.1rem 0.5rem; + border-radius: 999px; + font-size: 0.7rem; + font-weight: 600; + letter-spacing: 0.01em; + border: 1px solid color-mix(in srgb, currentColor 35%, transparent); + background: color-mix(in srgb, currentColor 10%, transparent); + /* Pill is rendered as a `; +} + +// Render one row per scenario in the Details tab. Skipped if a scenario has +// no block (older results). Burst gets recovery_time_seconds appended. +function _reliabilityRows(row) { + const viz = row.viz || {}; + const rows = []; + + // Format: "4.19% ⚠ noisy (n=14)" or "12.4% high-variance (n=14)". + // Icon is omitted (not just blank) for high-variance so we don't render + // a dangling "·" — the label and colour carry the meaning instead. + const fmtBlock = (b) => { + const icon = _STABILITY_ICON[b.stability] || ""; + const head = icon ? `${b.cv_pct}% · ${icon}` : `${b.cv_pct}%`; + return `${head} ${esc(b.stability || "")} (n=${b.n})`; + }; + + // offline — show the worst (largest CV) of all client_concurrency rows. + // That's the limiting concurrency for stability claims. + if (viz.offline && Array.isArray(viz.offline.throughput_reliability)) { + const labels = viz.offline.labels || []; + const blocks = viz.offline.throughput_reliability; + const indexed = blocks + .map((b, i) => ({ b, label: labels[i] || `cc=${i}` })) + .filter((x) => _hasReliability(x.b)); + if (indexed.length) { + indexed.sort((a, b) => b.b.cv_pct - a.b.cv_pct); + const w = indexed[0]; + rows.push(_detailRow( + `Offline throughput (cc=${w.label})`, + fmtBlock(w.b), + { html: true }, + )); + } + } + + if (viz.online && Array.isArray(viz.online.ttft_ms_p99_reliability)) { + const labels = viz.online.labels || []; + const blocks = viz.online.ttft_ms_p99_reliability; + const indexed = blocks + .map((b, i) => ({ b, label: labels[i] || `qps=${i}` })) + .filter((x) => _hasReliability(x.b)); + if (indexed.length) { + indexed.sort((a, b) => b.b.cv_pct - a.b.cv_pct); + const w = indexed[0]; + rows.push(_detailRow( + `Online TTFT p99 (qps=${w.label})`, + fmtBlock(w.b), + { html: true }, + )); + } + } + + if (viz.interactive && _hasReliability(viz.interactive.ttft_ms_p99_reliability)) { + rows.push(_detailRow( + "Interactive TTFT p99", + fmtBlock(viz.interactive.ttft_ms_p99_reliability), + { html: true }, + )); + } + + if (viz.sustained && _hasReliability(viz.sustained.throughput_post_warmup_reliability)) { + rows.push(_detailRow( + "Sustained throughput (post-warmup)", + fmtBlock(viz.sustained.throughput_post_warmup_reliability), + { html: true }, + )); + } + + // Burst — non-CV stability metric: time-to-recover after a peak window. + if (viz.burst && viz.burst.recovery_time_seconds != null) { + const sec = Number(viz.burst.recovery_time_seconds); + rows.push(_detailRow( + "Burst recovery time", + `${sec.toFixed(2)} s ` + + `median per-cycle, threshold 1.5× steady p99`, + { html: true }, + )); + } else if (viz.burst && Array.isArray(viz.burst.recovery_time_seconds_per_cycle) + && viz.burst.recovery_time_seconds_per_cycle.length === 0) { + // Burst ran but never recovered within any cycle's post-burst window. + rows.push(_detailRow( + "Burst recovery time", + `not measurable (never returned to baseline within steady window)`, + { html: true }, + )); + } + + return rows; +} + +// Flatten env_info.vendor_details into rows. Keys that are objects/arrays are +// JSON-stringified for display; null/empty values are dropped. We intentionally +// do not try to humanise key names — vendors disagree on terminology and the +// keys themselves are the documented contract. +function _vendorDetailRows(row) { + const obj = (row.detail || {}).env_vendor_details; + if (!obj || typeof obj !== "object") return []; + const rows = []; + for (const k of Object.keys(obj).sort()) { + const v = obj[k]; + if (v === null || v === undefined || v === "") continue; + if (Array.isArray(v) && v.length === 0) continue; + if (typeof v === "object" && !Array.isArray(v) && Object.keys(v).length === 0) continue; + const display = (typeof v === "object") + ? JSON.stringify(v) + : String(v); + rows.push(_detailRow(k, display, { mono: true })); + } + return rows; +} + function _renderDetails(row, panel) { const d = row.detail || {}; @@ -457,6 +702,21 @@ function _renderDetails(row, panel) { d.run_pp != null ? _detailRow("Pipeline parallel size", d.run_pp) : null, d.run_dp != null ? _detailRow("Data parallel size", d.run_dp) : null, ]), + _detailSection("Reliability", _reliabilityRows(row), { + anchor: "reliability", + // Plain HTML (not auto-escaped) so we can highlight the threshold + // pills. Keep the wording short — readers shouldn't need to read a + // paragraph to decode a single percentage in the table below. + caption: + "Inter-run coefficient of variation " + + "(CV = std / mean × 100%) across the runs that produced this " + + "submission — lower is more reproducible. " + + "✓ stable ≤ 3%  ·  " + + "⚠ noisy ≤ 8%  ·  " + + "high-variance > 8% " + + "(informational — natural jitter, not a measurement error).", + }), + _detailSection("Vendor-specific environment", _vendorDetailRows(row)), _detailSection("Accuracy", [ _detailRow("Subset score", d.acc_score, { format: (v) => Number(v).toFixed(2), diff --git a/loadgen/loadgen.py b/loadgen/loadgen.py index 758abc41..7a917108 100644 --- a/loadgen/loadgen.py +++ b/loadgen/loadgen.py @@ -66,6 +66,123 @@ def _percentile(data: list, p: float): return sorted_data[lo] + (sorted_data[hi] - sorted_data[lo]) * (idx - lo) +# ── Reliability helpers ────────────────────────────────────────────────────── +# +# These produce the inter-run variability metrics consumed by the leaderboard +# UI's "Reliability" panel. They live here (not in types.py) because they are +# pure functions over already-collected per-run lists and are easier to +# regression-test alongside the scenario implementations. +# +# Coefficient of Variation (CV) = std / mean × 100 %, computed with ddof=1 +# (sample std) when n ≥ 2. Returns None when input is too small or the mean +# is non-positive, in which case the frontend hides the badge entirely so +# users do not see a meaningless "stable ✓" on a single-run measurement. + +# Stability thresholds. Calibrated from the initial 255-result sustained +# backfill (May 2026), which had a CV median of 3.1 % and p90 of 13.1 %. +# Tighter thresholds (e.g. ≤ 2 % / ≤ 5 %) labelled the literal median run +# "noisy" and ~30 % of submissions "unstable" — those labels were too +# pejorative for what is really normal hardware jitter. The buckets here +# split the empirical distribution into ~48 % stable / ~35 % noisy / +# ~17 % high-variance, with the high-variance bucket dominated by chips +# we expect to genuinely throttle (RTX 5090, A6000, V100s) or scale-out +# topologies with real network jitter (Ascend ×16). +# +# Important wording choice: the third tier is named "high-variance", not +# "unstable", because high CV does not mean the measurement is wrong — it +# means the headline number carries irreducible variability the reader +# should be aware of. The frontend reflects this with a colour cue and +# no error glyph; "high-variance" is a description, not a verdict. +_STABILITY_THRESHOLD_STABLE_PCT = 3.0 +_STABILITY_THRESHOLD_NOISY_PCT = 8.0 + + +def _cv_pct(values: list) -> Optional[float]: + """Coefficient of variation as a percentage. None if too small / undefined.""" + if not values or len(values) < 2: + return None + arr = np.asarray(values, dtype=float) + arr = arr[np.isfinite(arr)] + if len(arr) < 2: + return None + mean = float(arr.mean()) + if mean <= 0: + return None + std = float(arr.std(ddof=1)) + return round(std / mean * 100.0, 2) + + +def _stability_label(cv_pct: Optional[float]) -> Optional[str]: + """Map a CV percentage to a stable / noisy / high-variance label, or None.""" + if cv_pct is None: + return None + if cv_pct <= _STABILITY_THRESHOLD_STABLE_PCT: + return "stable" + if cv_pct <= _STABILITY_THRESHOLD_NOISY_PCT: + return "noisy" + return "high-variance" + + +def _reliability_block(values: list, *, decimals: int = 2) -> dict: + """ + Build the standard {n, mean, std, cv_pct, stability, runs} block emitted + per metric. Returns an empty dict (not None) so the result schema retains + a consistent shape — frontend gates on `cv_pct` being numeric. + """ + if not values: + return {} + arr = np.asarray(values, dtype=float) + arr = arr[np.isfinite(arr)] + if len(arr) == 0: + return {} + mean = float(arr.mean()) + std = float(arr.std(ddof=1)) if len(arr) >= 2 else 0.0 + cv = _cv_pct(arr.tolist()) + return { + "n": int(len(arr)), + "mean": round(mean, decimals), + "std": round(std, decimals), + "cv_pct": cv, + "stability": _stability_label(cv), + "runs": [round(float(v), decimals) for v in arr.tolist()], + } + + +def _compute_recovery_time( + arrivals: list, + ttfts: list, + *, + threshold_ms: float, + window_s: float = 3.0, + min_samples: int = 5, +) -> Optional[float]: + """ + Find the elapsed time (seconds, relative to the start of the post-burst + steady window) at which a rolling-window p99 of TTFT first falls below + `threshold_ms`. Returns None if it never recovers within the window or + if there are too few samples to compute a stable percentile. + + `arrivals` and `ttfts` are parallel arrays — arrivals must be relative + times in seconds from the start of the measurement window. + """ + if not arrivals or len(arrivals) < min_samples: + return None + pairs = sorted(zip(arrivals, ttfts)) + a = [p[0] for p in pairs] + t = [p[1] for p in pairs] + n = len(a) + j = 0 + for i in range(n): + while j < i and a[j] < a[i] - window_s: + j += 1 + if i - j + 1 < min_samples: + continue + window = t[j:i + 1] + if float(np.percentile(window, 99)) < threshold_ms: + return round(float(a[i]), 2) + return None + + class AccelMarkLoadGen: def __init__( @@ -95,19 +212,35 @@ def __init__( # Use different request counts per scenario # offline: use request_count (default 200, fast) # online/interactive: use online_request_count if set, else all requests + # + # Warmup semantics differ per scenario: + # offline / interactive : `warmup_runs` = number of full passes to discard + # (interactive_warmup_runs may override for interactive) + # sustained : `warmup_minutes` = time window discarded + # online / burst : `online_warmup_requests` / `burst_warmup_requests` + # = number of dummy requests fired sequentially before + # the timed phase, used to JIT-compile kernels, allocate + # CUDA graphs, prime the KV cache, etc. Results are + # never recorded. Without this warmup, the first few + # requests of the first QPS level inflate p99 by + # hundreds of ms on cold engines. + self.online_warmup_requests = 0 + self.burst_warmup_requests = 0 if scenario == "offline": count = suite.get("request_count") self.warmup_runs = suite.get("warmup_runs", 1) elif scenario == "online": # online and interactive need more requests for reliable p99 count = suite.get("online_request_count", suite.get("request_count")) - self.warmup_runs = suite.get("online_warmup_runs", 0) + self.warmup_runs = 0 # online doesn't use full-pass warmup + self.online_warmup_requests = suite.get("online_warmup_requests", 10) elif scenario == "interactive": count = suite.get("interactive_request_count", suite.get("request_count")) self.warmup_runs = suite.get("interactive_warmup_runs", 0) elif scenario == "burst": count = suite.get("online_request_count", suite.get("request_count")) - self.warmup_runs = suite.get("online_warmup_runs", 0) + self.warmup_runs = 0 # burst doesn't use full-pass warmup + self.burst_warmup_requests = suite.get("burst_warmup_requests", 10) elif scenario == "speculative": count = suite.get("request_count") self.warmup_runs = suite.get("warmup_runs", 1) @@ -138,8 +271,7 @@ def run(self, inference_fn: Callable) -> dict: "_run_interactive requires an async inference_fn(request: InferenceRequest) -> InferenceResult. " "Pass an async coroutine (inference_fn_streaming)." ) - loop = asyncio.get_event_loop() - return loop.run_until_complete(self._run_interactive_async(inference_fn)) + return asyncio.run(self._run_interactive_async(inference_fn)) elif self.scenario == "training": return self._run_training(inference_fn) elif self.scenario == "multiturn": @@ -324,6 +456,12 @@ def _fire_request() -> None: if len(ttft_p99s) >= 2: ttft_p99_drift_ms = round(ttft_p99s[-1] - ttft_p99s[0], 1) + # Inter-sample throughput stability across the post-warmup window. + # This is conceptually distinct from `throttle_ratio` (min/max): CV + # measures dispersion around the mean and is a better signal for + # "the chip throttles intermittently" vs "the chip is degrading". + throughput_cv_block = _reliability_block(throughputs, decimals=1) + return { "sustained": { "sustained_concurrency": sustained_concurrency, @@ -335,6 +473,7 @@ def _fire_request() -> None: "throttle_ratio": throttle_ratio, "throttle_onset_minute": throttle_onset_minute, "ttft_p99_drift_ms": ttft_p99_drift_ms, + "throughput_post_warmup_reliability": throughput_cv_block, } } @@ -502,6 +641,12 @@ def _run_offline(self, inference_fn: Callable) -> dict: "power_watts_avg": None, "power_watts_peak": None, "oom": False, + # Per-run throughput reliability: lets the UI show "stable ✓ / + # noisy ⚠ / unstable ✗" without forcing the user to download + # samples.jsonl. `runs` preserves the underlying values so + # future stability rules can be recomputed without a re-run. + "throughput_tokens_per_sec_reliability": + _reliability_block(run_throughputs, decimals=2), "_throughput_note": "output_only", "_concurrency_note": ( "client_concurrency is the number of requests sent simultaneously. " @@ -532,14 +677,44 @@ def _run_online(self, inference_fn: Callable) -> dict: "Pass an async coroutine (inference_fn_streaming), " "not a sync wrapper." ) - loop = asyncio.get_event_loop() - return loop.run_until_complete(self._run_online_async(inference_fn)) + return asyncio.run(self._run_online_async(inference_fn)) + + async def _warmup_requests(self, async_inference_fn, count: int, label: str) -> None: + """ + Fire `count` dummy requests sequentially before timed measurement. + + Cycles through self.requests if count > len(requests). All results are + discarded — purpose is to JIT-compile kernels, allocate CUDA graphs, + prime the KV cache, and let the engine reach steady-state schedules + before the timed phase. Without this, the first few timed requests on + cold engines inflate p99 by hundreds of milliseconds. + + Exceptions during warmup are logged and swallowed; warmup failures + must never abort the timed run. + """ + if count <= 0 or not self.requests: + return + tqdm.write( + f"[{label} warmup] firing {count} dummy requests " + "(results discarded — engine JIT/cache warm-up)" + ) + for i in range(count): + req = self.requests[i % len(self.requests)] + try: + await async_inference_fn(req) + except Exception as e: + tqdm.write(f"[{label} warmup] request {i} failed (ignored): {e}") async def _run_online_async(self, async_inference_fn) -> dict: """ Async implementation of the online scenario. Generates Poisson arrival times upfront, then fires all requests concurrently via asyncio.gather so the engine sees real concurrent load. + + A warmup phase fires `online_warmup_requests` dummy requests + sequentially before the QPS sweep. Their latencies are not recorded + in `results_by_qps`. This prevents cold-engine TTFT spikes from + inflating p99 at the first QPS level. """ loop = asyncio.get_event_loop() sla_ms = self.suite["online_sla_ttft_ms"] @@ -547,6 +722,10 @@ async def _run_online_async(self, async_inference_fn) -> dict: all_samples: list[SampleRecord] = [] max_valid_qps = 0.0 + await self._warmup_requests( + async_inference_fn, self.online_warmup_requests, "online" + ) + for target_qps in self.suite["online_qps_levels"]: print(f"[online] target_qps={target_qps}") run_ttfts: list[list[float]] = [] @@ -607,6 +786,15 @@ async def send_request(req: InferenceRequest, t_arrival: float) -> InferenceResu tpot_p90 = float(np.percentile(all_tpots, 90)) if all_tpots else 0 tpot_p99 = float(np.percentile(all_tpots, 99)) if all_tpots else 0 + # Per-run p99s, used to surface inter-run TTFT variability. + # We compute each run's p99 independently; the scenario's overall + # `ttft_ms_p99` (above) is computed by pooling all per-request + # TTFTs, which is the headline number, while this CV captures + # whether that number is reproducible across `num_runs`. + ttft_p99_per_run = [ + float(np.percentile(run, 99)) for run in run_ttfts if run + ] + sla_met = ttft_p99 < sla_ms if sla_met: max_valid_qps = target_qps @@ -626,6 +814,8 @@ async def send_request(req: InferenceRequest, t_arrival: float) -> InferenceResu "tpot_ms_p99": round(tpot_p99, 2), "elapsed_seconds_median": round(float(np.median(run_elapsed_times)), 1), "sla_met": sla_met, + "ttft_ms_p99_reliability": + _reliability_block(ttft_p99_per_run, decimals=2), }) self._write_samples(all_samples) @@ -665,6 +855,11 @@ async def _run_burst_async(self, async_inference_fn) -> dict: sla_met_during_burst — bool: p99 TTFT during burst < online_sla_ttft_ms burst_degradation_ratio — burst_ttft_p99 / steady_ttft_p99 (higher = worse) results_by_cycle — per-cycle breakdown + + A warmup phase fires `burst_warmup_requests` dummy requests + sequentially before the first cycle. Their latencies are excluded + from steady/burst windows so the first cycle's steady-state + measurement is not contaminated by cold-engine TTFT spikes. """ loop = asyncio.get_event_loop() sla_ms = self.suite["online_sla_ttft_ms"] @@ -674,6 +869,10 @@ async def _run_burst_async(self, async_inference_fn) -> dict: steady_dur = self.suite["burst_interval_seconds"] num_runs = self.suite.get("num_runs", 3) + await self._warmup_requests( + async_inference_fn, self.burst_warmup_requests, "burst" + ) + all_steady_ttfts: list[float] = [] all_burst_ttfts: list[float] = [] results_by_cycle = [] @@ -681,7 +880,16 @@ async def _run_burst_async(self, async_inference_fn) -> dict: all_samples: list[SampleRecord] = [] async def fire_window(qps: float, duration_secs: float, label: str): - """Fire requests at Poisson QPS for duration_secs. Returns list of InferenceResult.""" + """ + Fire requests at Poisson QPS for duration_secs. + + Returns + results : list[InferenceResult] in arrival order + elapsed : wall-clock seconds the window took + arrival_times : list[float] — each request's intended arrival + relative to window start (parallel to results). + Used to compute post-burst recovery_time_seconds. + """ n_expected = max(1, int(qps * duration_secs * 1.5)) requests_pool = (self.requests * ((n_expected // len(self.requests)) + 2))[:n_expected] @@ -690,7 +898,7 @@ async def fire_window(qps: float, duration_secs: float, label: str): pairs = [(req, t) for req, t in zip(requests_pool, arrival_times) if t < duration_secs] if not pairs: - return [], 0.0 + return [], 0.0, [] t_start = loop.time() @@ -702,20 +910,38 @@ async def send(req, t_arrival): results = list(await asyncio.gather(*[send(req, t) for req, t in pairs])) elapsed = loop.time() - t_start - return results, elapsed + window_arrivals = [t for (_, t) in pairs] + return results, elapsed, window_arrivals + + # Each cycle's per-request data, captured so we can compute + # recovery_time_seconds in a single post-processing pass after + # all cycles complete. + cycle_data: list[dict] = [] for cycle_idx in range(num_runs): tqdm.write(f"[burst] cycle {cycle_idx + 1}/{num_runs} — steady({steady_qps} qps)...") - steady_results, steady_elapsed = await fire_window(steady_qps, steady_dur, "steady") - steady_ttfts = [r.first_token_time_ms for r in steady_results - if r.success and r.first_token_time_ms is not None] + steady_results, steady_elapsed, steady_arrivals = await fire_window( + steady_qps, steady_dur, "steady" + ) + steady_ttfts_pairs = [ + (a, r.first_token_time_ms) + for r, a in zip(steady_results, steady_arrivals) + if r.success and r.first_token_time_ms is not None + ] + steady_ttfts = [v for _, v in steady_ttfts_pairs] tqdm.write(f"[burst] cycle {cycle_idx + 1}/{num_runs} — burst({burst_qps} qps)...") - burst_results, burst_elapsed = await fire_window(burst_qps, burst_dur, "burst") - burst_ttfts = [r.first_token_time_ms for r in burst_results - if r.success and r.first_token_time_ms is not None] + burst_results, burst_elapsed, burst_arrivals = await fire_window( + burst_qps, burst_dur, "burst" + ) + burst_ttfts_pairs = [ + (a, r.first_token_time_ms) + for r, a in zip(burst_results, burst_arrivals) + if r.success and r.first_token_time_ms is not None + ] + burst_ttfts = [v for _, v in burst_ttfts_pairs] all_steady_ttfts.extend(steady_ttfts) all_burst_ttfts.extend(burst_ttfts) @@ -723,6 +949,11 @@ async def send(req, t_arrival): cycle_steady_p99 = float(np.percentile(steady_ttfts, 99)) if steady_ttfts else None cycle_burst_p99 = float(np.percentile(burst_ttfts, 99)) if burst_ttfts else None + cycle_data.append({ + "steady_pairs": steady_ttfts_pairs, + "burst_pairs": burst_ttfts_pairs, + }) + results_by_cycle.append({ "cycle": cycle_idx + 1, "steady_requests": len(steady_ttfts), @@ -748,6 +979,39 @@ async def send(req, t_arrival): sla_met_during_burst = (burst_p99 < sla_ms) if burst_p99 is not None else False degradation = round(burst_p99 / steady_p99, 3) if (burst_p99 and steady_p99) else None + # ── Recovery time after burst ───────────────────────────────────────── + # Definition: seconds elapsed within a post-burst steady window before + # the rolling p99 TTFT drops below 1.5× the long-term steady baseline. + # + # Implementation: the loop above runs `steady → burst` per cycle, so + # cycle (i+1)'s steady window is the post-burst recovery window for + # cycle i's burst. We compute one recovery time per cycle that has a + # successor steady window, then emit the median (more robust than + # mean to a single outlier cycle). + recovery_baseline_p99 = steady_p99 # long-term, post-warmup baseline + cycle_recovery_times: list[float] = [] + if recovery_baseline_p99 and recovery_baseline_p99 > 0: + threshold = 1.5 * recovery_baseline_p99 + for i in range(len(cycle_data) - 1): + post = cycle_data[i + 1]["steady_pairs"] + if not post: + continue + arrivals = [a for a, _ in post] + ttfts = [t for _, t in post] + rec = _compute_recovery_time( + arrivals, ttfts, + threshold_ms=threshold, + window_s=min(3.0, steady_dur / 2), + min_samples=5, + ) + if rec is not None: + cycle_recovery_times.append(rec) + + recovery_time_seconds = ( + round(float(np.median(cycle_recovery_times)), 2) + if cycle_recovery_times else None + ) + sla_icon = "✓" if sla_met_during_burst else "✗" chip_str = f" ({self.chip_count} chips)" if self.chip_count > 1 else "" tqdm.write( @@ -772,6 +1036,15 @@ async def send(req, t_arrival): "burst_ttft_p99_ms": round(burst_p99, 2) if burst_p99 else None, "sla_met_during_burst": sla_met_during_burst, "burst_degradation_ratio": degradation, + "recovery_time_seconds": recovery_time_seconds, + "recovery_time_seconds_per_cycle": [ + round(v, 2) for v in cycle_recovery_times + ] if cycle_recovery_times else [], + "_recovery_definition": ( + "Median seconds within the post-burst steady window before " + "rolling TTFT p99 drops below 1.5x the long-term steady baseline. " + "Lower is better; None means it never recovered within the window." + ), "results_by_cycle": results_by_cycle, }} @@ -781,8 +1054,7 @@ def _run_burst(self, inference_fn: Callable) -> dict: raise TypeError( "_run_burst requires an async inference_fn(request: InferenceRequest) -> InferenceResult." ) - loop = asyncio.get_event_loop() - return loop.run_until_complete(self._run_burst_async(inference_fn)) + return asyncio.run(self._run_burst_async(inference_fn)) # ------------------------------------------------------------------ # Interactive scenario @@ -793,11 +1065,15 @@ async def _run_interactive_async(self, async_inference_fn) -> dict: Send one request at a time, waiting for completion before sending the next. Measures single-request latency in isolation (no queueing pressure). Uses the same async engine as online to ensure consistent TTFT measurement. + + Per-run TTFT p99s are captured so the result emits an inter-run + reliability block alongside the pooled metrics. """ all_ttfts: list[float] = [] all_tpots: list[float] = [] all_samples: list[SampleRecord] = [] run_elapsed_times: list[float] = [] + ttft_p99_per_run: list[float] = [] total_runs = self.warmup_runs + self.suite["num_runs"] @@ -848,6 +1124,8 @@ async def _run_interactive_async(self, async_inference_fn) -> dict: all_ttfts.extend(run_ttfts) all_tpots.extend(run_tpots) run_elapsed_times.append(run_elapsed) + if run_ttfts: + ttft_p99_per_run.append(float(np.percentile(run_ttfts, 99))) if run_ttfts: tqdm.write( @@ -869,6 +1147,8 @@ async def _run_interactive_async(self, async_inference_fn) -> dict: "tpot_ms_p99": round(float(np.percentile(all_tpots, 99)), 2) if all_tpots else None, "peak_memory_gb": None, "elapsed_seconds_median": round(float(np.median(run_elapsed_times)), 1) if run_elapsed_times else None, + "ttft_ms_p99_reliability": + _reliability_block(ttft_p99_per_run, decimals=2), }} # ------------------------------------------------------------------ diff --git a/loadgen/tests/__init__.py b/loadgen/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/loadgen/tests/test_reliability.py b/loadgen/tests/test_reliability.py new file mode 100644 index 00000000..c2d4ea1b --- /dev/null +++ b/loadgen/tests/test_reliability.py @@ -0,0 +1,243 @@ +""" +Tests for the reliability blocks emitted by each loadgen scenario. + +Locks down: +- `_cv_pct` / `_stability_label` helpers +- `_reliability_block` shape contract +- `_compute_recovery_time` rolling-window logic +- offline / online / interactive / sustained / burst each emit the new + fields with the expected types and a non-None CV when n >= 2 + +These tests use the same MockInferenceFn pattern as test_warmup.py — a +real `async def` closure bound to a counter, since loadgen detects +coroutines via `asyncio.iscoroutinefunction()`. +""" + +from __future__ import annotations + +import asyncio + +import pytest + +from loadgen.loadgen import ( + AccelMarkLoadGen, + _compute_recovery_time, + _cv_pct, + _reliability_block, + _stability_label, +) +from loadgen.types import InferenceResult + + +# ── Pure helper tests ───────────────────────────────────────────────────────── + +def test_cv_pct_basic(): + assert _cv_pct([100.0, 100.0, 100.0]) == 0.0 + cv = _cv_pct([90.0, 100.0, 110.0]) + assert cv is not None + assert 9.0 < cv < 11.0, f"expected CV near 10%, got {cv}" + + +def test_cv_pct_returns_none_for_small_or_invalid_input(): + assert _cv_pct([]) is None + assert _cv_pct([42.0]) is None + assert _cv_pct([0.0, 0.0, 0.0]) is None # mean=0, undefined CV + + +def test_stability_labels(): + # Boundaries: ≤3% stable, ≤8% noisy, >8% high-variance. + assert _stability_label(0.5) == "stable" + assert _stability_label(3.0) == "stable" # inclusive boundary + assert _stability_label(3.01) == "noisy" + assert _stability_label(8.0) == "noisy" # inclusive boundary + assert _stability_label(8.01) == "high-variance" + assert _stability_label(20.0) == "high-variance" + assert _stability_label(None) is None + + +def test_reliability_block_shape(): + block = _reliability_block([100.0, 102.0, 98.0], decimals=1) + assert set(block.keys()) == {"n", "mean", "std", "cv_pct", "stability", "runs"} + assert block["n"] == 3 + assert block["mean"] == 100.0 + assert block["runs"] == [100.0, 102.0, 98.0] + assert block["stability"] == "stable" + + +def test_reliability_block_empty_input_returns_empty_dict(): + """Frontend gates on the block being non-empty; never None.""" + assert _reliability_block([]) == {} + + +# ── Recovery-time tests ────────────────────────────────────────────────────── + +def test_recovery_time_finds_the_first_clean_window(): + """Build a synthetic post-burst window where the first 5 seconds are + elevated and everything after is clean. Recovery must land around 5s.""" + arrivals = [i * 0.5 for i in range(40)] # 20 seconds of arrivals at 2 Hz + # Elevated TTFTs first 5 s, then drop to clean values. + ttfts = [1500.0 if a < 5.0 else 200.0 for a in arrivals] + rec = _compute_recovery_time(arrivals, ttfts, threshold_ms=500.0, window_s=2.0, min_samples=4) + assert rec is not None, "expected recovery, got None" + assert 4.5 <= rec <= 8.0, f"recovery expected ≈5–8s, got {rec}" + + +def test_recovery_time_returns_none_when_never_recovers(): + arrivals = [i * 0.5 for i in range(20)] + ttfts = [2000.0] * 20 # always above any sane threshold + assert _compute_recovery_time(arrivals, ttfts, threshold_ms=500.0) is None + + +def test_recovery_time_returns_none_when_too_few_samples(): + assert _compute_recovery_time([], [], threshold_ms=500.0) is None + assert _compute_recovery_time([1.0, 2.0], [100.0, 100.0], + threshold_ms=500.0, min_samples=5) is None + + +# ── Scenario integration tests ─────────────────────────────────────────────── + +def _make_requests(n: int): + from loadgen.loadgen import InferenceRequest + return [ + InferenceRequest(prompt=f"p{i}", request_id=i, input_tokens=10, max_tokens=20) + for i in range(n) + ] + + +def _async_fn(ttft_ms: float = 100.0): + """Build a real `async def` returning a constant InferenceResult.""" + async def fn(request) -> InferenceResult: + await asyncio.sleep(0) + return InferenceResult( + first_token_time_ms=ttft_ms, + total_time_ms=ttft_ms * 2, + output_tokens=20, + input_tokens=10, + success=True, + ) + return fn + + +def _sync_offline_fn(ttft_ms: float = 100.0): + """Sync inference_fn used for offline scenario — receives list of requests.""" + def fn(reqs): + return [ + InferenceResult( + first_token_time_ms=ttft_ms, + total_time_ms=ttft_ms * 2, + output_tokens=20, + input_tokens=10, + success=True, + ) + for _ in reqs + ] + return fn + + +def test_offline_emits_throughput_reliability(tmp_path): + suite = { + "concurrency_levels": [4], + "num_runs": 3, + "warmup_runs": 0, + "request_count": 8, + "input_tokens": 10, + } + requests = _make_requests(8) + gen = AccelMarkLoadGen(suite, requests, "offline", str(tmp_path)) + result = gen.run(_sync_offline_fn()) + + cc_results = result["offline"]["results_by_concurrency"] + assert cc_results, "offline scenario produced no results" + rel = cc_results[0].get("throughput_tokens_per_sec_reliability") + assert rel, "offline scenario did not emit reliability block" + assert rel["n"] == 3 + assert rel["cv_pct"] is not None + assert rel["stability"] in {"stable", "noisy", "high-variance"} + assert len(rel["runs"]) == 3 + + +def test_online_emits_ttft_p99_reliability(tmp_path): + suite = { + "num_runs": 2, + "online_qps_levels": [2.0], + "online_sla_ttft_ms": 1000, + "online_request_count": 6, + "online_warmup_requests": 0, + "input_tokens": 10, + } + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + result = gen.run(_async_fn(ttft_ms=100.0)) + + qps_results = result["online"]["results_by_qps"] + assert qps_results, "online scenario produced no results" + rel = qps_results[0].get("ttft_ms_p99_reliability") + assert rel, "online scenario did not emit reliability block" + assert rel["n"] == 2 + # With constant TTFT the CV should be exactly 0. + assert rel["cv_pct"] == 0.0 + assert rel["stability"] == "stable" + + +def test_interactive_emits_ttft_p99_reliability(tmp_path): + suite = { + "num_runs": 2, + "interactive_warmup_runs": 0, + "interactive_request_count": 4, + "input_tokens": 10, + } + requests = _make_requests(4) + gen = AccelMarkLoadGen(suite, requests, "interactive", str(tmp_path)) + result = gen.run(_async_fn(ttft_ms=120.0)) + + inter = result["interactive"] + rel = inter.get("ttft_ms_p99_reliability") + assert rel, "interactive scenario did not emit reliability block" + assert rel["n"] == 2 + assert rel["stability"] == "stable" + + +def test_sustained_emits_throughput_post_warmup_reliability(tmp_path): + """Run a tiny sustained scenario — long enough to produce ≥2 sample + intervals so CV is computable.""" + suite = { + "sustained_concurrency": 2, + "duration_minutes": 4 / 60, # 4 seconds total + "sample_interval_seconds": 1.0, + "warmup_minutes": 1 / 60, # 1-second warmup + "input_tokens": 10, + } + requests = _make_requests(20) + gen = AccelMarkLoadGen(suite, requests, "sustained", str(tmp_path)) + result = gen.run(_async_fn(ttft_ms=30.0)) + + rel = result["sustained"].get("throughput_post_warmup_reliability") + assert isinstance(rel, dict), "sustained scenario did not emit reliability block" + # cv_pct may be None if not enough post-warmup samples landed; we only + # require the field exists. When n >= 2 the stability must be set. + if rel.get("n", 0) >= 2: + assert rel["stability"] in {"stable", "noisy", "high-variance"} + + +def test_burst_emits_recovery_time_seconds(tmp_path): + """Burst with constant low TTFT should report a finite (small) + recovery_time and a list (possibly empty) per-cycle field.""" + suite = { + "num_runs": 2, + "online_sla_ttft_ms": 1000, + "online_request_count": 6, + "burst_warmup_requests": 0, + "burst_steady_qps": 2.0, + "burst_peak_qps": 4.0, + "burst_duration_seconds": 0.5, + "burst_interval_seconds": 0.5, + "input_tokens": 10, + } + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "burst", str(tmp_path)) + result = gen.run(_async_fn(ttft_ms=100.0)) + + burst = result["burst"] + assert "recovery_time_seconds" in burst + assert "recovery_time_seconds_per_cycle" in burst + assert isinstance(burst["recovery_time_seconds_per_cycle"], list) diff --git a/loadgen/tests/test_warmup.py b/loadgen/tests/test_warmup.py new file mode 100644 index 00000000..ea625986 --- /dev/null +++ b/loadgen/tests/test_warmup.py @@ -0,0 +1,254 @@ +""" +Tests for the warmup phase in online and burst scenarios. + +These scenarios used to read `online_warmup_runs` from suite.json but +silently ignored the value — every reported p99 was contaminated by +cold-engine TTFT spikes. This test suite locks down the fix so future +refactors can't reintroduce the bug. + +What is verified: +- Warmup requests are fired in `online` and `burst` BEFORE the timed phase +- Warmup latencies are NOT counted in the returned distribution +- A counter on the mock inference_fn confirms the exact request budget +- Warmup is a no-op when the parameter is 0 (back-compat) +- An exception during warmup does not abort the timed phase +""" + +from __future__ import annotations + +import asyncio +from typing import Optional + +import pytest + +from loadgen.loadgen import AccelMarkLoadGen +from loadgen.types import InferenceResult + + +# ── Fixtures ────────────────────────────────────────────────────────────────── + +def _make_requests(n: int): + """Build n minimal InferenceRequest-like objects using the same shim + loadgen.py falls back to when benchmark_runner is not importable.""" + from loadgen.loadgen import InferenceRequest + return [ + InferenceRequest( + prompt=f"prompt {i}", + request_id=i, + input_tokens=10, + max_tokens=20, + ) + for i in range(n) + ] + + +def _online_suite(qps_levels=(2.0,), warmup_requests: int = 5, num_runs: int = 1): + return { + "num_runs": num_runs, + "online_qps_levels": list(qps_levels), + "online_sla_ttft_ms": 1000, + "online_request_count": 8, + "online_warmup_requests": warmup_requests, + "input_tokens": 10, + } + + +def _burst_suite(warmup_requests: int = 5): + return { + "num_runs": 1, + "online_sla_ttft_ms": 1000, + "online_request_count": 6, + "burst_steady_qps": 2.0, + "burst_peak_qps": 4.0, + "burst_duration_seconds": 0.3, + "burst_interval_seconds": 0.3, + "burst_warmup_requests": warmup_requests, + "input_tokens": 10, + } + + +class MockInferenceFn: + """Counts every call and exposes an async callable as `.fn` for loadgen. + + The fast warmup latency vs slow timed latency makes it trivial to + assert that warmup requests are excluded from the distribution: if + warmup latencies leaked into results, p50/p99 would collapse to + the fast value. + + Note: loadgen uses `asyncio.iscoroutinefunction()` to detect async + inference_fn, which returns False for a class with `async __call__`. + So we expose `self.fn` as a real `async def` closure bound to this + instance's state. + """ + + def __init__(self, *, warmup_ttft_ms: float = 1.0, timed_ttft_ms: float = 100.0, + fail_first_n: int = 0): + self.call_count = 0 + self.warmup_ttft_ms = warmup_ttft_ms + self.timed_ttft_ms = timed_ttft_ms + self.fail_first_n = fail_first_n + self.warmup_budget: Optional[int] = None + + state = self # closure capture + + async def _fn(request) -> InferenceResult: + idx = state.call_count + state.call_count += 1 + if idx < state.fail_first_n: + raise RuntimeError(f"simulated failure on warmup request {idx}") + ttft = ( + state.warmup_ttft_ms + if state.warmup_budget is not None and idx < state.warmup_budget + else state.timed_ttft_ms + ) + await asyncio.sleep(0) # yield control + return InferenceResult( + first_token_time_ms=ttft, + total_time_ms=ttft * 2, + output_tokens=20, + input_tokens=10, + success=True, + ) + + self.fn = _fn + + def set_warmup_budget(self, n: int) -> None: + """Tell the mock how many of the next calls count as warmup.""" + self.warmup_budget = n + + +# ── online warmup ───────────────────────────────────────────────────────────── + +def test_online_warmup_fires_configured_count(tmp_path): + """Online scenario must fire exactly `online_warmup_requests` warmup calls.""" + suite = _online_suite(qps_levels=(2.0,), warmup_requests=5) + requests = _make_requests(8) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + + fn = MockInferenceFn() + fn.set_warmup_budget(5) + gen.run(fn.fn) + + # warmup (5) + 8 requests × 1 QPS × 1 run = 13 calls minimum + assert fn.call_count >= 13, ( + f"expected at least 13 inference_fn calls (5 warmup + 8 timed), " + f"got {fn.call_count}" + ) + + +def test_online_warmup_latencies_excluded_from_p99(tmp_path): + """If warmup latencies leaked into the recorded distribution, p99 would + collapse to the fast warmup value. Verify it stays at the timed value.""" + suite = _online_suite(qps_levels=(2.0,), warmup_requests=5) + requests = _make_requests(8) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + + fn = MockInferenceFn(warmup_ttft_ms=1.0, timed_ttft_ms=100.0) + fn.set_warmup_budget(5) + result = gen.run(fn.fn) + + qps_results = result["online"]["results_by_qps"] + assert qps_results, "expected at least one QPS level result" + p50 = qps_results[0]["ttft_ms_p50"] + p99 = qps_results[0]["ttft_ms_p99"] + + # If warmup leaked in, p50 would be near 1.0 ms. With warmup excluded, + # every recorded request returns 100 ms, so all percentiles snap there. + assert abs(p50 - 100.0) < 0.5, f"p50 contaminated by warmup: {p50}" + assert abs(p99 - 100.0) < 0.5, f"p99 contaminated by warmup: {p99}" + + +def test_online_warmup_zero_is_noop(tmp_path): + """Backward compat: setting online_warmup_requests=0 must skip warmup.""" + suite = _online_suite(qps_levels=(2.0,), warmup_requests=0) + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + + fn = MockInferenceFn() + gen.run(fn.fn) + + # No warmup means exactly 6 calls (1 QPS × 6 requests × 1 run). + assert fn.call_count == 6, ( + f"warmup=0 should fire only timed requests; got {fn.call_count} calls" + ) + + +def test_online_warmup_failure_does_not_abort_run(tmp_path): + """A failing warmup request must be logged and ignored — the timed phase + must still execute. Otherwise a flaky engine could prevent any submission.""" + suite = _online_suite(qps_levels=(2.0,), warmup_requests=3) + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + + # Fail the first 2 warmup requests; the 3rd warmup + all timed must run. + fn = MockInferenceFn(fail_first_n=2) + fn.set_warmup_budget(3) + result = gen.run(fn.fn) + + qps_results = result["online"]["results_by_qps"] + assert qps_results, "timed phase did not run despite warmup failures" + assert fn.call_count >= 3 + 6 # 3 warmup attempts + 6 timed + + +# ── burst warmup ────────────────────────────────────────────────────────────── + +def test_burst_warmup_fires_configured_count(tmp_path): + suite = _burst_suite(warmup_requests=4) + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "burst", str(tmp_path)) + + fn = MockInferenceFn() + fn.set_warmup_budget(4) + gen.run(fn.fn) + + # At least 4 warmup calls must have fired before the timed cycles. + assert fn.call_count >= 4, ( + f"burst warmup did not fire enough requests: {fn.call_count}" + ) + + +def test_burst_warmup_zero_is_noop(tmp_path): + """Suites that omit burst_warmup_requests entirely default to 10; setting + it to 0 must skip warmup.""" + suite = _burst_suite(warmup_requests=0) + requests = _make_requests(6) + gen = AccelMarkLoadGen(suite, requests, "burst", str(tmp_path)) + + fn = MockInferenceFn() + n_before = fn.call_count + gen.run(fn.fn) + # No assertion on exact count (timed cycles depend on Poisson timing), + # but we can assert the mock saw at least 1 timed call. + assert fn.call_count > n_before + + +# ── default values ──────────────────────────────────────────────────────────── + +def test_online_warmup_default_is_ten(tmp_path): + """Suite without online_warmup_requests should get a sensible default.""" + suite = { + "num_runs": 1, + "online_qps_levels": [2.0], + "online_sla_ttft_ms": 1000, + "online_request_count": 4, + "input_tokens": 10, + } + requests = _make_requests(4) + gen = AccelMarkLoadGen(suite, requests, "online", str(tmp_path)) + assert gen.online_warmup_requests == 10 + + +def test_burst_warmup_default_is_ten(tmp_path): + suite = { + "num_runs": 1, + "online_sla_ttft_ms": 1000, + "online_request_count": 4, + "burst_steady_qps": 2.0, + "burst_peak_qps": 4.0, + "burst_duration_seconds": 0.3, + "burst_interval_seconds": 0.3, + "input_tokens": 10, + } + requests = _make_requests(4) + gen = AccelMarkLoadGen(suite, requests, "burst", str(tmp_path)) + assert gen.burst_warmup_requests == 10 diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json index cf46b029..adcbafe3 100644 --- a/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json +++ b/results/community/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_sglang_c43a8309_958afbbd/result.json @@ -496,7 +496,44 @@ "sustained_throughput_tokens_per_sec": 562.5, "throttle_ratio": 0.966, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -0.3 + "ttft_p99_drift_ms": -0.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 562.5, + "std": 4.9, + "cv_pct": 0.86, + "stability": "stable", + "runs": [ + 566.2, + 560.8, + 565.3, + 561.4, + 561.9, + 570.0, + 558.3, + 563.3, + 552.7, + 569.3, + 558.9, + 568.4, + 557.2, + 565.5, + 554.1, + 563.7, + 563.3, + 565.0, + 563.3, + 564.7, + 564.3, + 562.3, + 569.5, + 550.8, + 562.5, + 561.7, + 566.5, + 558.2 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json index fb4ac5fd..e110ab18 100644 --- a/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json +++ b/results/community/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_sglang_c43a8309_99c43b97/result.json @@ -436,7 +436,44 @@ "sustained_throughput_tokens_per_sec": 54.9, "throttle_ratio": 0.666, "throttle_onset_minute": 11, - "ttft_p99_drift_ms": 66 + "ttft_p99_drift_ms": 66, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 54.9, + "std": 4.9, + "cv_pct": 8.95, + "stability": "high-variance", + "runs": [ + 56.2, + 56.3, + 56.2, + 56.3, + 56.3, + 56.2, + 56.2, + 56.2, + 56.3, + 37.5, + 56.3, + 56.2, + 56.3, + 56.2, + 56.3, + 56.2, + 56.3, + 56.2, + 56.3, + 56.2, + 37.5, + 56.3, + 56.2, + 56.2, + 56.3, + 56.3, + 56.2, + 56.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json b/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json index 29bb5796..54c9f403 100644 --- a/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json +++ b/results/community/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_sglang_c43a8309_435424a8/result.json @@ -327,7 +327,30 @@ "sustained_throughput_tokens_per_sec": 7095.4, "throttle_ratio": 0.92, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -4707.7 + "ttft_p99_drift_ms": -4707.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 7095.4, + "std": 147.1, + "cv_pct": 2.07, + "stability": "stable", + "runs": [ + 6616.4, + 7181.7, + 7188.9, + 7110.2, + 7106.5, + 7144.4, + 7158.3, + 7020.0, + 7160.7, + 7183.9, + 7180.8, + 7127.9, + 7111.1, + 7044.6 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/result.json b/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/result.json index 1652c2b9..cfe652b4 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/result.json @@ -573,7 +573,44 @@ "sustained_throughput_tokens_per_sec": 268.0, "throttle_ratio": 0.868, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -21.4 + "ttft_p99_drift_ms": -21.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 268.0, + "std": 8.9, + "cv_pct": 3.3, + "stability": "noisy", + "runs": [ + 251.1, + 271.0, + 265.9, + 264.0, + 276.5, + 269.5, + 247.8, + 277.0, + 283.2, + 262.3, + 271.0, + 266.3, + 268.9, + 252.2, + 267.2, + 263.6, + 271.2, + 285.4, + 270.5, + 266.6, + 270.5, + 277.3, + 266.5, + 259.0, + 279.0, + 268.9, + 271.4, + 260.1 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/sustained/result.json b/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/sustained/result.json index 6aa62562..8ca9962f 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_A_ascend_vllm_ascend_d4aa9fda_a2777c30/sustained/result.json @@ -475,7 +475,44 @@ "sustained_throughput_tokens_per_sec": 268.0, "throttle_ratio": 0.868, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -21.4 + "ttft_p99_drift_ms": -21.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 268.0, + "std": 8.9, + "cv_pct": 3.3, + "stability": "noisy", + "runs": [ + 251.1, + 271.0, + 265.9, + 264.0, + 276.5, + 269.5, + 247.8, + 277.0, + 283.2, + 262.3, + 271.0, + 266.3, + 268.9, + 252.2, + 267.2, + 263.6, + 271.2, + 285.4, + 270.5, + 266.6, + 270.5, + 277.3, + 266.5, + 259.0, + 279.0, + 268.9, + 271.4, + 260.1 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json index 3eef1882..442ac69f 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/result.json @@ -517,7 +517,44 @@ "sustained_throughput_tokens_per_sec": 53.2, "throttle_ratio": 0.733, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -20614.1 + "ttft_p99_drift_ms": -20614.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.2, + "std": 5.6, + "cv_pct": 10.6, + "stability": "high-variance", + "runs": [ + 46.9, + 59.7, + 51.2, + 51.2, + 55.5, + 46.9, + 64.0, + 46.9, + 59.7, + 46.9, + 55.5, + 55.5, + 46.9, + 59.8, + 46.9, + 59.7, + 46.9, + 59.7, + 51.2, + 51.2, + 55.4, + 46.9, + 64.0, + 46.9, + 55.5, + 51.2, + 55.5, + 51.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json index f8184d71..c4af0d0a 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_D_ascend_vllm_ascend_d4aa9fda_a3547ba9/sustained/result.json @@ -475,7 +475,44 @@ "sustained_throughput_tokens_per_sec": 53.2, "throttle_ratio": 0.733, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -20614.1 + "ttft_p99_drift_ms": -20614.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.2, + "std": 5.6, + "cv_pct": 10.6, + "stability": "high-variance", + "runs": [ + 46.9, + 59.7, + 51.2, + 51.2, + 55.5, + 46.9, + 64.0, + 46.9, + 59.7, + 46.9, + 55.5, + 55.5, + 46.9, + 59.8, + 46.9, + 59.7, + 46.9, + 59.7, + 51.2, + 51.2, + 55.4, + 46.9, + 64.0, + 46.9, + 55.5, + 51.2, + 55.5, + 51.2 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/result.json b/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/result.json index 67225a10..9e65b7bf 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/result.json @@ -409,7 +409,30 @@ "sustained_throughput_tokens_per_sec": 1238.9, "throttle_ratio": 0.883, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": -302.5 + "ttft_p99_drift_ms": -302.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1238.9, + "std": 47.1, + "cv_pct": 3.8, + "stability": "noisy", + "runs": [ + 1230.3, + 1302.7, + 1288.1, + 1349.2, + 1277.4, + 1204.2, + 1213.4, + 1211.4, + 1191.5, + 1223.0, + 1239.3, + 1204.1, + 1199.3, + 1210.9 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/sustained/result.json b/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/sustained/result.json index 0d8c6baf..113b11e6 100644 --- a/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x1_suite_F_ascend_vllm_ascend_d4aa9fda_bd7d8f87/sustained/result.json @@ -325,7 +325,30 @@ "sustained_throughput_tokens_per_sec": 1238.9, "throttle_ratio": 0.883, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": -302.5 + "ttft_p99_drift_ms": -302.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1238.9, + "std": 47.1, + "cv_pct": 3.8, + "stability": "noisy", + "runs": [ + 1230.3, + 1302.7, + 1288.1, + 1349.2, + 1277.4, + 1204.2, + 1213.4, + 1211.4, + 1191.5, + 1223.0, + 1239.3, + 1204.1, + 1199.3, + 1210.9 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json index 3061b81d..1c40ff18 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/result.json @@ -574,7 +574,44 @@ "sustained_throughput_tokens_per_sec": 53.2, "throttle_ratio": 0.616, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -127.8 + "ttft_p99_drift_ms": -127.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.2, + "std": 7.0, + "cv_pct": 13.23, + "stability": "high-variance", + "runs": [ + 56.7, + 55.0, + 54.4, + 54.2, + 52.3, + 40.7, + 62.1, + 54.1, + 52.1, + 63.0, + 40.6, + 65.9, + 41.9, + 44.9, + 65.4, + 50.3, + 58.2, + 52.0, + 49.0, + 55.4, + 49.7, + 51.5, + 54.8, + 55.4, + 63.5, + 45.1, + 55.0, + 45.3 + ] + } }, "interactive": { "ttft_ms_p50": 151.0, diff --git a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json index 9521952a..1511541c 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_B_ascend_vllm_ascend_d4aa9fda_fcb9725c/sustained/result.json @@ -475,7 +475,44 @@ "sustained_throughput_tokens_per_sec": 53.2, "throttle_ratio": 0.616, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -127.8 + "ttft_p99_drift_ms": -127.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.2, + "std": 7.0, + "cv_pct": 13.23, + "stability": "high-variance", + "runs": [ + 56.7, + 55.0, + 54.4, + 54.2, + 52.3, + 40.7, + 62.1, + 54.1, + 52.1, + 63.0, + 40.6, + 65.9, + 41.9, + 44.9, + 65.4, + 50.3, + 58.2, + 52.0, + 49.0, + 55.4, + 49.7, + 51.5, + 54.8, + 55.4, + 63.5, + 45.1, + 55.0, + 45.3 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json index 1941f8ac..eb69665d 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 226.6, "throttle_ratio": 0.821, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -109.1 + "ttft_p99_drift_ms": -109.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 226.6, + "std": 11.3, + "cv_pct": 5.01, + "stability": "noisy", + "runs": [ + 217.7, + 215.0, + 227.9, + 236.8, + 220.7, + 231.6, + 213.2, + 230.9, + 230.7, + 225.5, + 221.6, + 244.7, + 201.0, + 231.4, + 239.6, + 239.7, + 220.8, + 225.3, + 238.2, + 201.8, + 239.8, + 226.4, + 222.9, + 219.6, + 241.1, + 236.3, + 213.8, + 231.0 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json index abb80572..9c23b4c0 100644 --- a/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json +++ b/results/verified/huawei_ascend_910b2x8_suite_G_ascend_vllm_ascend_d4aa9fda_d726144e/sustained/result.json @@ -475,7 +475,44 @@ "sustained_throughput_tokens_per_sec": 226.6, "throttle_ratio": 0.821, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -109.1 + "ttft_p99_drift_ms": -109.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 226.6, + "std": 11.3, + "cv_pct": 5.01, + "stability": "noisy", + "runs": [ + 217.7, + 215.0, + 227.9, + 236.8, + 220.7, + 231.6, + 213.2, + 230.9, + 230.7, + 225.5, + 221.6, + 244.7, + 201.0, + 231.4, + 239.6, + 239.7, + 220.8, + 225.3, + 238.2, + 201.8, + 239.8, + 226.4, + 222.9, + 219.6, + 241.1, + 236.3, + 213.8, + 231.0 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json index 7b12b9eb..2b038c2a 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/result.json @@ -621,7 +621,44 @@ "sustained_throughput_tokens_per_sec": 53.5, "throttle_ratio": 0.603, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -14.8 + "ttft_p99_drift_ms": -14.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.5, + "std": 7.9, + "cv_pct": 14.84, + "stability": "high-variance", + "runs": [ + 67.8, + 42.6, + 54.7, + 54.2, + 54.7, + 64.5, + 52.3, + 41.8, + 48.5, + 63.0, + 47.9, + 60.4, + 59.1, + 54.8, + 48.3, + 50.3, + 50.4, + 54.4, + 63.6, + 48.6, + 56.1, + 46.2, + 60.1, + 40.9, + 67.1, + 42.5, + 60.5, + 43.5 + ] + } }, "interactive": { "ttft_ms_p50": 152.7, diff --git a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json index 6bd84a6a..f87c4b33 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_B_ascend_vllm_ascend_d4aa9fda_635ecf42/sustained/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 53.5, "throttle_ratio": 0.603, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -14.8 + "ttft_p99_drift_ms": -14.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 53.5, + "std": 7.9, + "cv_pct": 14.84, + "stability": "high-variance", + "runs": [ + 67.8, + 42.6, + 54.7, + 54.2, + 54.7, + 64.5, + 52.3, + 41.8, + 48.5, + 63.0, + 47.9, + 60.4, + 59.1, + 54.8, + 48.3, + 50.3, + 50.4, + 54.4, + 63.6, + 48.6, + 56.1, + 46.2, + 60.1, + 40.9, + 67.1, + 42.5, + 60.5, + 43.5 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/result.json b/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/result.json index 6bbc8bb0..284b5456 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/result.json @@ -618,7 +618,44 @@ "sustained_throughput_tokens_per_sec": 262.2, "throttle_ratio": 0.861, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -18.4 + "ttft_p99_drift_ms": -18.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 262.2, + "std": 9.5, + "cv_pct": 3.62, + "stability": "noisy", + "runs": [ + 262.3, + 254.2, + 254.5, + 278.0, + 260.2, + 267.7, + 253.4, + 262.9, + 259.5, + 276.3, + 252.2, + 242.8, + 280.9, + 268.2, + 253.6, + 260.4, + 264.4, + 273.4, + 259.1, + 264.3, + 258.9, + 253.2, + 263.8, + 265.3, + 263.8, + 250.9, + 256.8, + 282.0 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/sustained/result.json b/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/sustained/result.json index 19390809..a8b21777 100644 --- a/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x16_suite_G_ascend_vllm_ascend_d4aa9fda_329a2b9e/sustained/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 262.2, "throttle_ratio": 0.861, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -18.4 + "ttft_p99_drift_ms": -18.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 262.2, + "std": 9.5, + "cv_pct": 3.62, + "stability": "noisy", + "runs": [ + 262.3, + 254.2, + 254.5, + 278.0, + 260.2, + 267.7, + 253.4, + 262.9, + 259.5, + 276.3, + 252.2, + 242.8, + 280.9, + 268.2, + 253.6, + 260.4, + 264.4, + 273.4, + 259.1, + 264.3, + 258.9, + 253.2, + 263.8, + 265.3, + 263.8, + 250.9, + 256.8, + 282.0 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/result.json b/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/result.json index 9d0b9088..aae2043c 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/result.json @@ -620,7 +620,44 @@ "sustained_throughput_tokens_per_sec": 376.6, "throttle_ratio": 0.893, "throttle_onset_minute": 21.0, - "ttft_p99_drift_ms": -8.0 + "ttft_p99_drift_ms": -8.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 376.6, + "std": 11.4, + "cv_pct": 3.03, + "stability": "noisy", + "runs": [ + 368.1, + 384.3, + 387.7, + 375.4, + 366.8, + 376.1, + 382.0, + 372.0, + 377.9, + 372.0, + 369.6, + 377.6, + 376.8, + 385.8, + 362.3, + 392.8, + 363.8, + 393.9, + 373.8, + 352.6, + 394.1, + 360.1, + 390.1, + 373.1, + 388.4, + 370.3, + 394.7, + 363.7 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/sustained/result.json b/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/sustained/result.json index 0a93919d..f2d38380 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_A_ascend_vllm_ascend_d4aa9fda_74d19743/sustained/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 376.6, "throttle_ratio": 0.893, "throttle_onset_minute": 21.0, - "ttft_p99_drift_ms": -8.0 + "ttft_p99_drift_ms": -8.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 376.6, + "std": 11.4, + "cv_pct": 3.03, + "stability": "noisy", + "runs": [ + 368.1, + 384.3, + 387.7, + 375.4, + 366.8, + 376.1, + 382.0, + 372.0, + 377.9, + 372.0, + 369.6, + 377.6, + 376.8, + 385.8, + 362.3, + 392.8, + 363.8, + 393.9, + 373.8, + 352.6, + 394.1, + 360.1, + 390.1, + 373.1, + 388.4, + 370.3, + 394.7, + 363.7 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json index bc4cf352..7ee86f6b 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/result.json @@ -564,7 +564,44 @@ "sustained_throughput_tokens_per_sec": 54.2, "throttle_ratio": 0.784, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -321.6 + "ttft_p99_drift_ms": -321.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 54.2, + "std": 3.8, + "cv_pct": 7.06, + "stability": "noisy", + "runs": [ + 46.9, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.5, + 55.5, + 51.2, + 59.7, + 51.2, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.5, + 55.5, + 46.9, + 59.8, + 51.2, + 59.8, + 51.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json index 03f09823..125e8f48 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_D_ascend_vllm_ascend_d4aa9fda_6c1e7ffe/sustained/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 54.2, "throttle_ratio": 0.784, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -321.6 + "ttft_p99_drift_ms": -321.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 54.2, + "std": 3.8, + "cv_pct": 7.06, + "stability": "noisy", + "runs": [ + 46.9, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.5, + 55.5, + 51.2, + 59.7, + 51.2, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.4, + 55.5, + 51.2, + 59.7, + 51.2, + 55.5, + 55.5, + 46.9, + 59.8, + 51.2, + 59.8, + 51.2 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/result.json b/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/result.json index 6f66bb12..0d5e67ec 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/result.json @@ -456,7 +456,30 @@ "sustained_throughput_tokens_per_sec": 2217.9, "throttle_ratio": 0.94, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -217.5 + "ttft_p99_drift_ms": -217.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2217.9, + "std": 34.3, + "cv_pct": 1.54, + "stability": "stable", + "runs": [ + 2121.0, + 2228.7, + 2201.3, + 2198.4, + 2215.7, + 2232.6, + 2225.4, + 2220.6, + 2187.0, + 2241.5, + 2245.4, + 2250.2, + 2255.4, + 2227.6 + ] + } } }, "accuracy": { diff --git a/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/sustained/result.json b/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/sustained/result.json index 2d1b61e9..bcb9d743 100644 --- a/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/sustained/result.json +++ b/results/verified/huawei_ascend_ascend910x1_suite_F_ascend_vllm_ascend_d4aa9fda_8826a63d/sustained/result.json @@ -372,7 +372,30 @@ "sustained_throughput_tokens_per_sec": 2217.9, "throttle_ratio": 0.94, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -217.5 + "ttft_p99_drift_ms": -217.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2217.9, + "std": 34.3, + "cv_pct": 1.54, + "stability": "stable", + "runs": [ + 2121.0, + 2228.7, + 2201.3, + 2198.4, + 2215.7, + 2232.6, + 2225.4, + 2220.6, + 2187.0, + 2241.5, + 2245.4, + 2250.2, + 2255.4, + 2227.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/result.json index 653ebddc..70f42b4b 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/result.json @@ -495,7 +495,44 @@ "sustained_throughput_tokens_per_sec": 484.0, "throttle_ratio": 0.892, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -13.4 + "ttft_p99_drift_ms": -13.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 484.0, + "std": 15.3, + "cv_pct": 3.16, + "stability": "noisy", + "runs": [ + 489.2, + 480.6, + 476.6, + 474.6, + 484.4, + 490.8, + 462.1, + 485.0, + 453.9, + 462.0, + 486.8, + 454.3, + 483.2, + 467.2, + 483.4, + 480.4, + 468.5, + 501.4, + 507.5, + 482.9, + 503.3, + 498.0, + 496.9, + 494.1, + 496.2, + 479.2, + 508.6, + 501.7 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/sustained/result.json index d553f1f1..7075969f 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_A_nvidia_vllm_47f5d58e_b4a92b30/sustained/result.json @@ -397,7 +397,44 @@ "sustained_throughput_tokens_per_sec": 484.0, "throttle_ratio": 0.892, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -13.4 + "ttft_p99_drift_ms": -13.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 484.0, + "std": 15.3, + "cv_pct": 3.16, + "stability": "noisy", + "runs": [ + 489.2, + 480.6, + 476.6, + 474.6, + 484.4, + 490.8, + 462.1, + 485.0, + 453.9, + 462.0, + 486.8, + 454.3, + 483.2, + 467.2, + 483.4, + 480.4, + 468.5, + 501.4, + 507.5, + 482.9, + 503.3, + 498.0, + 496.9, + 494.1, + 496.2, + 479.2, + 508.6, + 501.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json index fbc82d38..649a377c 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 491.9, "throttle_ratio": 0.898, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -144.4 + "ttft_p99_drift_ms": -144.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 491.9, + "std": 14.0, + "cv_pct": 2.85, + "stability": "stable", + "runs": [ + 459.7, + 507.7, + 490.3, + 491.6, + 491.2, + 504.1, + 484.6, + 487.8, + 504.7, + 481.5, + 511.8, + 475.1, + 500.9, + 496.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json index b9c8c6bf..1cab7952 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/bf16/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 491.9, "throttle_ratio": 0.898, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -144.4 + "ttft_p99_drift_ms": -144.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 491.9, + "std": 14.0, + "cv_pct": 2.85, + "stability": "stable", + "runs": [ + 459.7, + 507.7, + 490.3, + 491.6, + 491.2, + 504.1, + 484.6, + 487.8, + 504.7, + 481.5, + 511.8, + 475.1, + 500.9, + 496.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/result.json index b98c6b18..7b184c01 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 709.1, "throttle_ratio": 0.935, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -196.6 + "ttft_p99_drift_ms": -196.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.1, + "std": 12.2, + "cv_pct": 1.72, + "stability": "stable", + "runs": [ + 674.8, + 719.2, + 722.0, + 704.1, + 703.0, + 722.0, + 703.0, + 708.7, + 717.0, + 717.1, + 706.9, + 716.7, + 702.6, + 710.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/sustained/result.json index 554f0644..006c7372 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/fp8/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 709.1, "throttle_ratio": 0.935, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -196.6 + "ttft_p99_drift_ms": -196.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.1, + "std": 12.2, + "cv_pct": 1.72, + "stability": "stable", + "runs": [ + 674.8, + 719.2, + 722.0, + 704.1, + 703.0, + 722.0, + 703.0, + 708.7, + 717.0, + 717.1, + 706.9, + 716.7, + 702.6, + 710.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json index 67954308..20645b1e 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 813.5, "throttle_ratio": 0.926, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -204.3 + "ttft_p99_drift_ms": -204.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 813.5, + "std": 17.7, + "cv_pct": 2.17, + "stability": "stable", + "runs": [ + 778.9, + 820.5, + 809.4, + 823.7, + 823.7, + 793.7, + 828.6, + 841.5, + 797.3, + 818.5, + 799.6, + 829.7, + 826.3, + 797.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json index 36bb8403..69a88e9e 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w4a16/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 813.5, "throttle_ratio": 0.926, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -204.3 + "ttft_p99_drift_ms": -204.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 813.5, + "std": 17.7, + "cv_pct": 2.17, + "stability": "stable", + "runs": [ + 778.9, + 820.5, + 809.4, + 823.7, + 823.7, + 793.7, + 828.6, + 841.5, + 797.3, + 818.5, + 799.6, + 829.7, + 826.3, + 797.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/result.json index ca1752d1..a9bc15bf 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 700.0, "throttle_ratio": 0.945, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -208.8 + "ttft_p99_drift_ms": -208.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 700.0, + "std": 10.5, + "cv_pct": 1.51, + "stability": "stable", + "runs": [ + 676.5, + 702.7, + 701.1, + 708.1, + 693.3, + 702.6, + 708.2, + 696.5, + 697.5, + 713.5, + 685.0, + 716.0, + 702.4, + 696.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/sustained/result.json index 08e47dc3..e5d34321 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a16/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 700.0, "throttle_ratio": 0.945, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -208.8 + "ttft_p99_drift_ms": -208.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 700.0, + "std": 10.5, + "cv_pct": 1.51, + "stability": "stable", + "runs": [ + 676.5, + 702.7, + 701.1, + 708.1, + 693.3, + 702.6, + 708.2, + 696.5, + 697.5, + 713.5, + 685.0, + 716.0, + 702.4, + 696.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/result.json index 040a4132..9f88f9ba 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/result.json @@ -356,7 +356,30 @@ "sustained_throughput_tokens_per_sec": 657.7, "throttle_ratio": 0.922, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -106.5 + "ttft_p99_drift_ms": -106.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 657.7, + "std": 11.4, + "cv_pct": 1.73, + "stability": "stable", + "runs": [ + 632.3, + 685.6, + 656.9, + 651.9, + 656.6, + 660.3, + 662.7, + 648.2, + 660.2, + 654.3, + 661.2, + 664.8, + 654.3, + 659.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/sustained/result.json index 74ebc6a6..15e689f1 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_C_nvidia_vllm_47f5d58e_57cc3fdf/w8a8/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 657.7, "throttle_ratio": 0.922, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -106.5 + "ttft_p99_drift_ms": -106.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 657.7, + "std": 11.4, + "cv_pct": 1.73, + "stability": "stable", + "runs": [ + 632.3, + 685.6, + 656.9, + 651.9, + 656.6, + 660.3, + 662.7, + 648.2, + 660.2, + 654.3, + 661.2, + 664.8, + 654.3, + 659.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json index 0003f085..e444f214 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/result.json @@ -439,7 +439,44 @@ "sustained_throughput_tokens_per_sec": 57.0, "throttle_ratio": 0.705, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -451.2 + "ttft_p99_drift_ms": -451.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 57.0, + "std": 7.4, + "cv_pct": 13.07, + "stability": "high-variance", + "runs": [ + 51.2, + 55.5, + 68.2, + 51.2, + 51.2, + 55.4, + 72.5, + 51.2, + 55.5, + 51.2, + 55.5, + 68.2, + 55.5, + 55.5, + 51.1, + 68.4, + 55.4, + 55.5, + 51.2, + 51.2, + 72.5, + 55.5, + 51.2, + 55.5, + 51.2, + 72.5, + 51.2, + 55.5 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json index d4e23453..75aad313 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_D_nvidia_vllm_47f5d58e_8e114cbe/sustained/result.json @@ -397,7 +397,44 @@ "sustained_throughput_tokens_per_sec": 57.0, "throttle_ratio": 0.705, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -451.2 + "ttft_p99_drift_ms": -451.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 57.0, + "std": 7.4, + "cv_pct": 13.07, + "stability": "high-variance", + "runs": [ + 51.2, + 55.5, + 68.2, + 51.2, + 51.2, + 55.4, + 72.5, + 51.2, + 55.5, + 51.2, + 55.5, + 68.2, + 55.5, + 55.5, + 51.1, + 68.4, + 55.4, + 55.5, + 51.2, + 51.2, + 72.5, + 55.5, + 51.2, + 55.5, + 51.2, + 72.5, + 51.2, + 55.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/result.json index 23b30f8b..5e8ac339 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/result.json @@ -331,7 +331,30 @@ "sustained_throughput_tokens_per_sec": 3972.5, "throttle_ratio": 0.963, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -114.8 + "ttft_p99_drift_ms": -114.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3972.5, + "std": 41.8, + "cv_pct": 1.05, + "stability": "stable", + "runs": [ + 3951.2, + 3922.5, + 4011.2, + 4072.5, + 3992.6, + 3964.1, + 4012.5, + 3989.7, + 3938.4, + 3932.9, + 3939.0, + 3938.5, + 3995.8, + 3954.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/sustained/result.json index 445a5911..be0a3b44 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx1_suite_F_nvidia_vllm_47f5d58e_fe3156b5/sustained/result.json @@ -247,7 +247,30 @@ "sustained_throughput_tokens_per_sec": 3972.5, "throttle_ratio": 0.963, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -114.8 + "ttft_p99_drift_ms": -114.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3972.5, + "std": 41.8, + "cv_pct": 1.05, + "stability": "stable", + "runs": [ + 3951.2, + 3922.5, + 4011.2, + 4072.5, + 3992.6, + 3964.1, + 4012.5, + 3989.7, + 3938.4, + 3932.9, + 3939.0, + 3938.5, + 3995.8, + 3954.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json index 7b24e63d..65a2c792 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/result.json @@ -574,7 +574,44 @@ "sustained_throughput_tokens_per_sec": 164.3, "throttle_ratio": 0.806, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": 17.4 + "ttft_p99_drift_ms": 17.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 164.3, + "std": 8.5, + "cv_pct": 5.2, + "stability": "noisy", + "runs": [ + 167.7, + 172.9, + 181.0, + 164.4, + 162.7, + 175.0, + 174.0, + 173.0, + 174.6, + 164.4, + 171.3, + 166.6, + 157.8, + 160.2, + 155.9, + 166.3, + 149.8, + 161.3, + 157.5, + 171.4, + 150.8, + 162.6, + 157.2, + 160.2, + 156.9, + 168.6, + 145.8, + 170.8 + ] + } }, "interactive": { "ttft_ms_p50": 81.65, diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json index 94afb943..33933391 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_B_nvidia_vllm_47f5d58e_14410aea/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 164.3, "throttle_ratio": 0.806, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": 17.4 + "ttft_p99_drift_ms": 17.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 164.3, + "std": 8.5, + "cv_pct": 5.2, + "stability": "noisy", + "runs": [ + 167.7, + 172.9, + 181.0, + 164.4, + 162.7, + 175.0, + 174.0, + 173.0, + 174.6, + 164.4, + 171.3, + 166.6, + 157.8, + 160.2, + 155.9, + 166.3, + 149.8, + 161.3, + 157.5, + 171.4, + 150.8, + 162.6, + 157.2, + 160.2, + 156.9, + 168.6, + 145.8, + 170.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json index 993f6d90..fe47c5a0 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 472.7, "throttle_ratio": 0.902, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -2.3 + "ttft_p99_drift_ms": -2.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 472.7, + "std": 11.2, + "cv_pct": 2.38, + "stability": "stable", + "runs": [ + 482.4, + 470.0, + 478.0, + 464.6, + 475.5, + 474.1, + 475.7, + 467.2, + 478.8, + 476.8, + 443.2, + 477.3, + 480.2, + 459.0, + 465.2, + 486.8, + 468.0, + 466.5, + 484.3, + 454.4, + 491.3, + 483.4, + 482.3, + 454.0, + 481.7, + 470.6, + 462.2, + 482.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json index ddf5c312..c6564030 100644 --- a/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_40gbx8_suite_G_nvidia_vllm_47f5d58e_08de2dc2/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 472.7, "throttle_ratio": 0.902, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -2.3 + "ttft_p99_drift_ms": -2.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 472.7, + "std": 11.2, + "cv_pct": 2.38, + "stability": "stable", + "runs": [ + 482.4, + 470.0, + 478.0, + 464.6, + 475.5, + 474.1, + 475.7, + 467.2, + 478.8, + 476.8, + 443.2, + 477.3, + 480.2, + 459.0, + 465.2, + 486.8, + 468.0, + 466.5, + 484.3, + 454.4, + 491.3, + 483.4, + 482.3, + 454.0, + 481.7, + 470.6, + 462.2, + 482.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json index ca26d93b..1dec8d50 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json @@ -494,7 +494,44 @@ "sustained_throughput_tokens_per_sec": 712.3, "throttle_ratio": 0.947, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -0.2 + "ttft_p99_drift_ms": -0.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 712.3, + "std": 9.4, + "cv_pct": 1.32, + "stability": "stable", + "runs": [ + 707.4, + 710.2, + 728.3, + 707.4, + 711.2, + 708.9, + 701.6, + 723.0, + 702.0, + 716.4, + 718.5, + 716.5, + 708.7, + 711.1, + 715.6, + 699.2, + 721.0, + 689.9, + 718.4, + 720.0, + 726.1, + 713.8, + 694.3, + 709.4, + 720.1, + 714.8, + 705.6, + 725.1 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json index 1fc95fb6..c99db7f5 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json @@ -394,7 +394,44 @@ "sustained_throughput_tokens_per_sec": 712.3, "throttle_ratio": 0.947, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -0.2 + "ttft_p99_drift_ms": -0.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 712.3, + "std": 9.4, + "cv_pct": 1.32, + "stability": "stable", + "runs": [ + 707.4, + 710.2, + 728.3, + 707.4, + 711.2, + 708.9, + 701.6, + 723.0, + 702.0, + 716.4, + 718.5, + 716.5, + 708.7, + 711.1, + 715.6, + 699.2, + 721.0, + 689.9, + 718.4, + 720.0, + 726.1, + 713.8, + 694.3, + 709.4, + 720.1, + 714.8, + 705.6, + 725.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/result.json index 10757712..f071d27d 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/result.json @@ -500,7 +500,44 @@ "sustained_throughput_tokens_per_sec": 551.9, "throttle_ratio": 0.868, "throttle_onset_minute": 18.0, - "ttft_p99_drift_ms": 9.0 + "ttft_p99_drift_ms": 9.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 551.9, + "std": 25.7, + "cv_pct": 4.66, + "stability": "noisy", + "runs": [ + 566.9, + 585.4, + 575.4, + 575.6, + 584.1, + 571.1, + 568.6, + 582.2, + 558.0, + 562.9, + 581.4, + 575.8, + 578.3, + 572.0, + 568.3, + 542.1, + 526.1, + 528.9, + 541.5, + 521.5, + 513.8, + 536.7, + 510.0, + 536.8, + 527.1, + 508.2, + 521.6, + 531.8 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/sustained/result.json index c1e56599..46ed843d 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_ed4b0557/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 551.9, "throttle_ratio": 0.868, "throttle_onset_minute": 18.0, - "ttft_p99_drift_ms": 9.0 + "ttft_p99_drift_ms": 9.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 551.9, + "std": 25.7, + "cv_pct": 4.66, + "stability": "noisy", + "runs": [ + 566.9, + 585.4, + 575.4, + 575.6, + 584.1, + 571.1, + 568.6, + 582.2, + 558.0, + 562.9, + 581.4, + 575.8, + 578.3, + 572.0, + 568.3, + 542.1, + 526.1, + 528.9, + 541.5, + 521.5, + 513.8, + 536.7, + 510.0, + 536.8, + 527.1, + 508.2, + 521.6, + 531.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json index 75e68ff6..eefbf857 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json @@ -357,7 +357,30 @@ "sustained_throughput_tokens_per_sec": 706.9, "throttle_ratio": 0.899, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -360.1 + "ttft_p99_drift_ms": -360.1, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 706.9, + "std": 19.3, + "cv_pct": 2.73, + "stability": "stable", + "runs": [ + 655.0, + 711.0, + 698.3, + 718.8, + 724.4, + 701.8, + 706.9, + 720.6, + 697.1, + 726.2, + 701.9, + 728.3, + 688.9, + 717.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json index ee4ebabf..1a000d46 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json @@ -244,7 +244,30 @@ "sustained_throughput_tokens_per_sec": 706.9, "throttle_ratio": 0.899, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -360.1 + "ttft_p99_drift_ms": -360.1, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 706.9, + "std": 19.3, + "cv_pct": 2.73, + "stability": "stable", + "runs": [ + 655.0, + 711.0, + 698.3, + 718.8, + 724.4, + 701.8, + 706.9, + 720.6, + 697.1, + 726.2, + 701.9, + 728.3, + 688.9, + 717.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json index 27e0744a..65f49b8f 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json @@ -362,7 +362,30 @@ "sustained_throughput_tokens_per_sec": 437.3, "throttle_ratio": 0.897, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -632.2 + "ttft_p99_drift_ms": -632.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 437.3, + "std": 11.6, + "cv_pct": 2.66, + "stability": "stable", + "runs": [ + 409.4, + 431.5, + 456.2, + 439.7, + 430.2, + 432.7, + 452.2, + 436.1, + 432.3, + 431.9, + 449.0, + 445.3, + 441.9, + 433.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json index 9982bb06..ab83ad8d 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json @@ -249,7 +249,30 @@ "sustained_throughput_tokens_per_sec": 437.3, "throttle_ratio": 0.897, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -632.2 + "ttft_p99_drift_ms": -632.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 437.3, + "std": 11.6, + "cv_pct": 2.66, + "stability": "stable", + "runs": [ + 409.4, + 431.5, + 456.2, + 439.7, + 430.2, + 432.7, + 452.2, + 436.1, + 432.3, + 431.9, + 449.0, + 445.3, + 441.9, + 433.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json index 485a0fb3..feea95d5 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json @@ -362,7 +362,30 @@ "sustained_throughput_tokens_per_sec": 494.1, "throttle_ratio": 0.905, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -320.5 + "ttft_p99_drift_ms": -320.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 494.1, + "std": 12.0, + "cv_pct": 2.42, + "stability": "stable", + "runs": [ + 456.8, + 504.0, + 486.6, + 499.0, + 496.0, + 498.3, + 495.4, + 496.1, + 503.2, + 489.6, + 504.8, + 500.4, + 494.6, + 492.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json index 0fa36d8f..f1ef7eaa 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json @@ -249,7 +249,30 @@ "sustained_throughput_tokens_per_sec": 494.1, "throttle_ratio": 0.905, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -320.5 + "ttft_p99_drift_ms": -320.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 494.1, + "std": 12.0, + "cv_pct": 2.42, + "stability": "stable", + "runs": [ + 456.8, + 504.0, + 486.6, + 499.0, + 496.0, + 498.3, + 495.4, + 496.1, + 503.2, + 489.6, + 504.8, + 500.4, + 494.6, + 492.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json index ff6e59c1..0828f267 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json @@ -362,7 +362,30 @@ "sustained_throughput_tokens_per_sec": 399.4, "throttle_ratio": 0.879, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -331.5 + "ttft_p99_drift_ms": -331.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 399.4, + "std": 12.5, + "cv_pct": 3.13, + "stability": "noisy", + "runs": [ + 366.9, + 402.7, + 400.8, + 402.1, + 395.3, + 398.4, + 410.0, + 398.6, + 390.3, + 417.2, + 391.1, + 408.7, + 415.2, + 395.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json index eaea3a87..01b72946 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json @@ -249,7 +249,30 @@ "sustained_throughput_tokens_per_sec": 399.4, "throttle_ratio": 0.879, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -331.5 + "ttft_p99_drift_ms": -331.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 399.4, + "std": 12.5, + "cv_pct": 3.13, + "stability": "noisy", + "runs": [ + 366.9, + 402.7, + 400.8, + 402.1, + 395.3, + 398.4, + 410.0, + 398.6, + 390.3, + 417.2, + 391.1, + 408.7, + 415.2, + 395.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/result.json index f1aeef5e..1db3621f 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 537.9, "throttle_ratio": 0.87, "throttle_onset_minute": 9.0, - "ttft_p99_drift_ms": -138.7 + "ttft_p99_drift_ms": -138.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 537.9, + "std": 25.5, + "cv_pct": 4.74, + "stability": "noisy", + "runs": [ + 516.9, + 570.2, + 563.2, + 574.0, + 567.1, + 569.5, + 533.9, + 526.7, + 509.9, + 532.6, + 499.2, + 522.4, + 520.7, + 524.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/sustained/result.json index 74ec1b43..3948212c 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/bf16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 537.9, "throttle_ratio": 0.87, "throttle_onset_minute": 9.0, - "ttft_p99_drift_ms": -138.7 + "ttft_p99_drift_ms": -138.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 537.9, + "std": 25.5, + "cv_pct": 4.74, + "stability": "noisy", + "runs": [ + 516.9, + 570.2, + 563.2, + 574.0, + 567.1, + 569.5, + 533.9, + 526.7, + 509.9, + 532.6, + 499.2, + 522.4, + 520.7, + 524.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json index 5d3e2c1d..592ad309 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 709.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -189.7 + "ttft_p99_drift_ms": -189.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.7, + "std": 15.4, + "cv_pct": 2.18, + "stability": "stable", + "runs": [ + 741.9, + 718.6, + 701.6, + 688.1, + 729.3, + 691.6, + 728.3, + 715.4, + 712.9, + 702.4, + 699.8, + 698.3, + 705.5, + 702.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json index c8f36da6..0a71eb6e 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/fp8/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 709.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -189.7 + "ttft_p99_drift_ms": -189.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.7, + "std": 15.4, + "cv_pct": 2.18, + "stability": "stable", + "runs": [ + 741.9, + 718.6, + 701.6, + 688.1, + 729.3, + 691.6, + 728.3, + 715.4, + 712.9, + 702.4, + 699.8, + 698.3, + 705.5, + 702.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/result.json index d4c5752a..fc10a331 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 757.0, "throttle_ratio": 0.942, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -185.6 + "ttft_p99_drift_ms": -185.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 757.0, + "std": 14.6, + "cv_pct": 1.92, + "stability": "stable", + "runs": [ + 737.0, + 764.7, + 762.2, + 771.5, + 740.1, + 755.5, + 752.4, + 744.3, + 781.3, + 745.1, + 782.2, + 748.8, + 747.8, + 765.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/sustained/result.json index 46c58f19..afa0d716 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w4a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 757.0, "throttle_ratio": 0.942, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -185.6 + "ttft_p99_drift_ms": -185.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 757.0, + "std": 14.6, + "cv_pct": 1.92, + "stability": "stable", + "runs": [ + 737.0, + 764.7, + 762.2, + 771.5, + 740.1, + 755.5, + 752.4, + 744.3, + 781.3, + 745.1, + 782.2, + 748.8, + 747.8, + 765.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json index d2f40e9b..de77ce59 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 694.8, "throttle_ratio": 0.912, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -179.3 + "ttft_p99_drift_ms": -179.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 694.8, + "std": 17.4, + "cv_pct": 2.51, + "stability": "stable", + "runs": [ + 728.5, + 716.1, + 677.7, + 692.5, + 712.4, + 664.6, + 700.4, + 688.4, + 678.1, + 692.0, + 710.5, + 679.9, + 690.7, + 695.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json index 21592a14..1710fbb0 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 694.8, "throttle_ratio": 0.912, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -179.3 + "ttft_p99_drift_ms": -179.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 694.8, + "std": 17.4, + "cv_pct": 2.51, + "stability": "stable", + "runs": [ + 728.5, + 716.1, + 677.7, + 692.5, + 712.4, + 664.6, + 700.4, + 688.4, + 678.1, + 692.0, + 710.5, + 679.9, + 690.7, + 695.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json index 0daadb7b..4e391997 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 643.9, "throttle_ratio": 0.925, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -92.0 + "ttft_p99_drift_ms": -92.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 643.9, + "std": 13.7, + "cv_pct": 2.12, + "stability": "stable", + "runs": [ + 655.9, + 662.5, + 620.7, + 649.8, + 643.7, + 631.6, + 645.2, + 628.6, + 671.1, + 634.4, + 650.0, + 633.5, + 643.4, + 643.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json index bf2083e9..5685fadc 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_6940965a/w8a8/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 643.9, "throttle_ratio": 0.925, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -92.0 + "ttft_p99_drift_ms": -92.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 643.9, + "std": 13.7, + "cv_pct": 2.12, + "stability": "stable", + "runs": [ + 655.9, + 662.5, + 620.7, + 649.8, + 643.7, + 631.6, + 645.2, + 628.6, + 671.1, + 634.4, + 650.0, + 633.5, + 643.4, + 643.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json index ac745b18..fa0ebd2e 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json @@ -438,7 +438,44 @@ "sustained_throughput_tokens_per_sec": 58.7, "throttle_ratio": 0.866, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 202.0 + "ttft_p99_drift_ms": 202.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 58.7, + "std": 3.2, + "cv_pct": 5.46, + "stability": "noisy", + "runs": [ + 55.4, + 59.7, + 55.5, + 64.0, + 55.5, + 59.7, + 59.8, + 59.7, + 59.7, + 55.5, + 64.0, + 55.5, + 55.5, + 59.7, + 64.0, + 55.4, + 55.5, + 64.0, + 55.5, + 59.7, + 55.5, + 64.0, + 55.4, + 59.8, + 59.7, + 59.7, + 59.7, + 55.5 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json index 097e0e91..23ccbdfa 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json @@ -394,7 +394,44 @@ "sustained_throughput_tokens_per_sec": 58.7, "throttle_ratio": 0.866, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 202.0 + "ttft_p99_drift_ms": 202.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 58.7, + "std": 3.2, + "cv_pct": 5.46, + "stability": "noisy", + "runs": [ + 55.4, + 59.7, + 55.5, + 64.0, + 55.5, + 59.7, + 59.8, + 59.7, + 59.7, + 55.5, + 64.0, + 55.5, + 55.5, + 59.7, + 64.0, + 55.4, + 55.5, + 64.0, + 55.5, + 59.7, + 55.5, + 64.0, + 55.4, + 59.8, + 59.7, + 59.7, + 59.7, + 55.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json index 57b288a8..b72baa3a 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/result.json @@ -444,7 +444,44 @@ "sustained_throughput_tokens_per_sec": 67.1, "throttle_ratio": 0.501, "throttle_onset_minute": 21.0, - "ttft_p99_drift_ms": -474.5 + "ttft_p99_drift_ms": -474.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 67.1, + "std": 6.4, + "cv_pct": 9.6, + "stability": "high-variance", + "runs": [ + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 34.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json index bf336ef5..aa42e23f 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_7bef8eef/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 67.1, "throttle_ratio": 0.501, "throttle_onset_minute": 21.0, - "ttft_p99_drift_ms": -474.5 + "ttft_p99_drift_ms": -474.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 67.1, + "std": 6.4, + "cv_pct": 9.6, + "stability": "high-variance", + "runs": [ + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 34.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json index 2e7e0ce3..dc61504c 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json @@ -336,7 +336,30 @@ "sustained_throughput_tokens_per_sec": 11576.2, "throttle_ratio": 0.958, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -15.4 + "ttft_p99_drift_ms": -15.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 11576.2, + "std": 139.5, + "cv_pct": 1.2, + "stability": "stable", + "runs": [ + 11541.5, + 11672.6, + 11721.9, + 11526.8, + 11228.1, + 11380.9, + 11711.7, + 11643.5, + 11662.7, + 11612.6, + 11623.8, + 11639.3, + 11651.4, + 11450.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json index 6851ff63..78b09b49 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json @@ -249,7 +249,30 @@ "sustained_throughput_tokens_per_sec": 11576.2, "throttle_ratio": 0.958, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -15.4 + "ttft_p99_drift_ms": -15.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 11576.2, + "std": 139.5, + "cv_pct": 1.2, + "stability": "stable", + "runs": [ + 11541.5, + 11672.6, + 11721.9, + 11526.8, + 11228.1, + 11380.9, + 11711.7, + 11643.5, + 11662.7, + 11612.6, + 11623.8, + 11639.3, + 11651.4, + 11450.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json index 4d2726b7..c9818acf 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/result.json @@ -336,7 +336,30 @@ "sustained_throughput_tokens_per_sec": 2386.8, "throttle_ratio": 0.746, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -9.6 + "ttft_p99_drift_ms": -9.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2386.8, + "std": 225.6, + "cv_pct": 9.45, + "stability": "high-variance", + "runs": [ + 2796.4, + 2979.6, + 2296.3, + 2250.9, + 2332.1, + 2333.1, + 2362.3, + 2258.5, + 2439.4, + 2412.5, + 2221.4, + 2241.1, + 2268.2, + 2223.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json index a1bb052b..27d853c5 100644 --- a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json +++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_52ad2fe3/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 2386.8, "throttle_ratio": 0.746, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -9.6 + "ttft_p99_drift_ms": -9.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2386.8, + "std": 225.6, + "cv_pct": 9.45, + "stability": "high-variance", + "runs": [ + 2796.4, + 2979.6, + 2296.3, + 2250.9, + 2332.1, + 2333.1, + 2362.3, + 2258.5, + 2439.4, + 2412.5, + 2221.4, + 2241.1, + 2268.2, + 2223.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/result.json index 09fc23d5..56b152e2 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/result.json @@ -500,7 +500,44 @@ "sustained_throughput_tokens_per_sec": 546.4, "throttle_ratio": 0.881, "throttle_onset_minute": 23.0, - "ttft_p99_drift_ms": -0.7 + "ttft_p99_drift_ms": -0.7, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 546.4, + "std": 17.4, + "cv_pct": 3.19, + "stability": "noisy", + "runs": [ + 566.8, + 559.7, + 544.8, + 551.0, + 525.8, + 529.8, + 531.9, + 554.8, + 584.0, + 548.1, + 572.2, + 531.0, + 546.0, + 552.1, + 545.3, + 565.6, + 535.0, + 550.0, + 552.7, + 555.4, + 540.9, + 518.7, + 514.7, + 542.3, + 546.4, + 521.4, + 578.2, + 533.5 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/sustained/result.json index d327a16a..f3bb99cb 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_A_nvidia_vllm_47f5d58e_298e6500/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 546.4, "throttle_ratio": 0.881, "throttle_onset_minute": 23.0, - "ttft_p99_drift_ms": -0.7 + "ttft_p99_drift_ms": -0.7, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 546.4, + "std": 17.4, + "cv_pct": 3.19, + "stability": "noisy", + "runs": [ + 566.8, + 559.7, + 544.8, + 551.0, + 525.8, + 529.8, + 531.9, + 554.8, + 584.0, + 548.1, + 572.2, + 531.0, + 546.0, + 552.1, + 545.3, + 565.6, + 535.0, + 550.0, + 552.7, + 555.4, + 540.9, + 518.7, + 514.7, + 542.3, + 546.4, + 521.4, + 578.2, + 533.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json index 0ed01fcb..8d42221b 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 534.5, "throttle_ratio": 0.933, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -140.3 + "ttft_p99_drift_ms": -140.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 534.5, + "std": 12.1, + "cv_pct": 2.26, + "stability": "stable", + "runs": [ + 559.4, + 525.8, + 552.5, + 528.7, + 547.5, + 525.4, + 532.6, + 526.2, + 521.7, + 531.5, + 521.7, + 533.9, + 528.7, + 546.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json index 7b8752c6..36af61f6 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/bf16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 534.5, "throttle_ratio": 0.933, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -140.3 + "ttft_p99_drift_ms": -140.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 534.5, + "std": 12.1, + "cv_pct": 2.26, + "stability": "stable", + "runs": [ + 559.4, + 525.8, + 552.5, + 528.7, + 547.5, + 525.4, + 532.6, + 526.2, + 521.7, + 531.5, + 521.7, + 533.9, + 528.7, + 546.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/result.json index 73b517f2..63d529d9 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 760.1, "throttle_ratio": 0.893, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -171.7 + "ttft_p99_drift_ms": -171.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 760.1, + "std": 33.5, + "cv_pct": 4.4, + "stability": "noisy", + "runs": [ + 769.6, + 793.5, + 770.5, + 800.0, + 794.2, + 803.6, + 796.6, + 754.2, + 723.9, + 717.3, + 739.1, + 737.1, + 719.0, + 722.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/sustained/result.json index 9f6d234c..bbe68d18 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/fp8/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 760.1, "throttle_ratio": 0.893, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -171.7 + "ttft_p99_drift_ms": -171.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 760.1, + "std": 33.5, + "cv_pct": 4.4, + "stability": "noisy", + "runs": [ + 769.6, + 793.5, + 770.5, + 800.0, + 794.2, + 803.6, + 796.6, + 754.2, + 723.9, + 717.3, + 739.1, + 737.1, + 719.0, + 722.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json index 3a45077f..68f0f389 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 829.8, "throttle_ratio": 0.83, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -193.6 + "ttft_p99_drift_ms": -193.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 829.8, + "std": 56.4, + "cv_pct": 6.8, + "stability": "noisy", + "runs": [ + 836.7, + 902.0, + 882.7, + 918.5, + 861.0, + 915.1, + 763.1, + 803.2, + 805.7, + 783.7, + 818.0, + 791.1, + 772.9, + 762.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json index 587e4d7e..0bd06286 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w4a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 829.8, "throttle_ratio": 0.83, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -193.6 + "ttft_p99_drift_ms": -193.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 829.8, + "std": 56.4, + "cv_pct": 6.8, + "stability": "noisy", + "runs": [ + 836.7, + 902.0, + 882.7, + 918.5, + 861.0, + 915.1, + 763.1, + 803.2, + 805.7, + 783.7, + 818.0, + 791.1, + 772.9, + 762.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json index e207171a..ac12dd14 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 745.0, "throttle_ratio": 0.866, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -190.8 + "ttft_p99_drift_ms": -190.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 745.0, + "std": 40.9, + "cv_pct": 5.49, + "stability": "noisy", + "runs": [ + 738.9, + 809.0, + 793.6, + 810.7, + 771.9, + 783.8, + 735.7, + 702.0, + 702.2, + 717.8, + 702.5, + 722.4, + 702.6, + 737.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json index bc45102a..ff160bbc 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 745.0, "throttle_ratio": 0.866, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -190.8 + "ttft_p99_drift_ms": -190.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 745.0, + "std": 40.9, + "cv_pct": 5.49, + "stability": "noisy", + "runs": [ + 738.9, + 809.0, + 793.6, + 810.7, + 771.9, + 783.8, + 735.7, + 702.0, + 702.2, + 717.8, + 702.5, + 722.4, + 702.6, + 737.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json index 494d6c51..57802246 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 715.7, "throttle_ratio": 0.843, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -94.9 + "ttft_p99_drift_ms": -94.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 715.7, + "std": 43.7, + "cv_pct": 6.1, + "stability": "noisy", + "runs": [ + 717.7, + 704.7, + 752.5, + 762.7, + 771.9, + 752.2, + 759.1, + 732.5, + 743.4, + 681.3, + 653.7, + 669.9, + 667.5, + 650.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json index f37b0c8d..9ae0eda1 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_C_nvidia_vllm_47f5d58e_944773aa/w8a8/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 715.7, "throttle_ratio": 0.843, "throttle_onset_minute": 10.0, - "ttft_p99_drift_ms": -94.9 + "ttft_p99_drift_ms": -94.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 715.7, + "std": 43.7, + "cv_pct": 6.1, + "stability": "noisy", + "runs": [ + 717.7, + 704.7, + 752.5, + 762.7, + 771.9, + 752.2, + 759.1, + 732.5, + 743.4, + 681.3, + 653.7, + 669.9, + 667.5, + 650.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json index 5596dda6..07871c94 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/result.json @@ -444,7 +444,44 @@ "sustained_throughput_tokens_per_sec": 67.0, "throttle_ratio": 0.502, "throttle_onset_minute": 28.0, - "ttft_p99_drift_ms": -397.5 + "ttft_p99_drift_ms": -397.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 67.0, + "std": 6.4, + "cv_pct": 9.57, + "stability": "high-variance", + "runs": [ + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 67.9, + 34.3, + 68.3 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json index 108f892d..bbb4b142 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_D_nvidia_vllm_47f5d58e_4d0e7990/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 67.0, "throttle_ratio": 0.502, "throttle_onset_minute": 28.0, - "ttft_p99_drift_ms": -397.5 + "ttft_p99_drift_ms": -397.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 67.0, + "std": 6.4, + "cv_pct": 9.57, + "stability": "high-variance", + "runs": [ + 68.2, + 68.3, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.3, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 68.2, + 68.3, + 67.9, + 34.3, + 68.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json index cc527f3c..d52a285a 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/result.json @@ -336,7 +336,30 @@ "sustained_throughput_tokens_per_sec": 2804.8, "throttle_ratio": 0.602, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -11.5 + "ttft_p99_drift_ms": -11.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2804.8, + "std": 516.6, + "cv_pct": 18.42, + "stability": "high-variance", + "runs": [ + 3817.9, + 4192.3, + 2621.3, + 2548.3, + 2703.0, + 2680.5, + 2557.3, + 2641.8, + 2612.4, + 2524.0, + 2626.8, + 2534.0, + 2647.2, + 2560.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json index 04f3b842..38392d34 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx1_suite_F_nvidia_vllm_47f5d58e_54d0e7aa/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 2804.8, "throttle_ratio": 0.602, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -11.5 + "ttft_p99_drift_ms": -11.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2804.8, + "std": 516.6, + "cv_pct": 18.42, + "stability": "high-variance", + "runs": [ + 3817.9, + 4192.3, + 2621.3, + 2548.3, + 2703.0, + 2680.5, + 2557.3, + 2641.8, + 2612.4, + 2524.0, + 2626.8, + 2534.0, + 2647.2, + 2560.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json index 832b5d1a..42bded81 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/result.json @@ -574,7 +574,44 @@ "sustained_throughput_tokens_per_sec": 184.0, "throttle_ratio": 0.816, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -7.8 + "ttft_p99_drift_ms": -7.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 184.0, + "std": 9.5, + "cv_pct": 5.15, + "stability": "noisy", + "runs": [ + 180.1, + 187.5, + 193.8, + 183.5, + 191.3, + 175.4, + 186.1, + 181.5, + 189.5, + 170.3, + 184.2, + 178.7, + 189.4, + 173.7, + 187.8, + 187.0, + 180.2, + 188.7, + 182.7, + 173.0, + 202.4, + 172.3, + 203.0, + 174.7, + 199.2, + 178.3, + 165.6, + 192.4 + ] + } }, "interactive": { "ttft_ms_p50": 81.8, diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json index 698c9eab..491c6168 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_B_nvidia_vllm_47f5d58e_de0853fa/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 184.0, "throttle_ratio": 0.816, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -7.8 + "ttft_p99_drift_ms": -7.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 184.0, + "std": 9.5, + "cv_pct": 5.15, + "stability": "noisy", + "runs": [ + 180.1, + 187.5, + 193.8, + 183.5, + 191.3, + 175.4, + 186.1, + 181.5, + 189.5, + 170.3, + 184.2, + 178.7, + 189.4, + 173.7, + 187.8, + 187.0, + 180.2, + 188.7, + 182.7, + 173.0, + 202.4, + 172.3, + 203.0, + 174.7, + 199.2, + 178.3, + 165.6, + 192.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/result.json index 83525842..269b6f03 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 569.1, "throttle_ratio": 0.845, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -27.8 + "ttft_p99_drift_ms": -27.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 569.1, + "std": 22.4, + "cv_pct": 3.93, + "stability": "noisy", + "runs": [ + 573.8, + 571.7, + 596.7, + 594.4, + 584.0, + 552.0, + 550.9, + 538.9, + 519.3, + 542.0, + 614.7, + 587.1, + 597.3, + 593.2, + 551.6, + 581.8, + 573.7, + 550.5, + 564.5, + 571.8, + 535.8, + 589.0, + 565.8, + 585.7, + 578.7, + 564.8, + 550.8, + 553.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/sustained/result.json b/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/sustained/result.json index 3f730cb1..33416153 100644 --- a/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/sustained/result.json +++ b/results/verified/nvidia_a800_sxm4_80gbx8_suite_G_nvidia_vllm_47f5d58e_d31ba78b/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 569.1, "throttle_ratio": 0.845, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -27.8 + "ttft_p99_drift_ms": -27.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 569.1, + "std": 22.4, + "cv_pct": 3.93, + "stability": "noisy", + "runs": [ + 573.8, + 571.7, + 596.7, + 594.4, + 584.0, + 552.0, + 550.9, + 538.9, + 519.3, + 542.0, + 614.7, + 587.1, + 597.3, + 593.2, + 551.6, + 581.8, + 573.7, + 550.5, + 564.5, + 571.8, + 535.8, + 589.0, + 565.8, + 585.7, + 578.7, + 564.8, + 550.8, + 553.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/result.json index 82157fb0..0142e7ff 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/result.json @@ -504,7 +504,44 @@ "sustained_throughput_tokens_per_sec": 309.9, "throttle_ratio": 0.887, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": 5.6 + "ttft_p99_drift_ms": 5.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 309.9, + "std": 9.6, + "cv_pct": 3.1, + "stability": "noisy", + "runs": [ + 316.9, + 312.1, + 306.3, + 311.7, + 323.5, + 307.9, + 290.3, + 315.2, + 308.3, + 304.1, + 316.4, + 309.6, + 296.7, + 317.6, + 311.5, + 306.4, + 312.3, + 321.9, + 308.5, + 289.1, + 323.1, + 300.0, + 312.4, + 306.1, + 322.1, + 305.8, + 296.1, + 326.1 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/sustained/result.json index 8a59eb57..f4650348 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_A_nvidia_vllm_47f5d58e_e95e2caa/sustained/result.json @@ -407,7 +407,44 @@ "sustained_throughput_tokens_per_sec": 309.9, "throttle_ratio": 0.887, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": 5.6 + "ttft_p99_drift_ms": 5.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 309.9, + "std": 9.6, + "cv_pct": 3.1, + "stability": "noisy", + "runs": [ + 316.9, + 312.1, + 306.3, + 311.7, + 323.5, + 307.9, + 290.3, + 315.2, + 308.3, + 304.1, + 316.4, + 309.6, + 296.7, + 317.6, + 311.5, + 306.4, + 312.3, + 321.9, + 308.5, + 289.1, + 323.1, + 300.0, + 312.4, + 306.1, + 322.1, + 305.8, + 296.1, + 326.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/result.json index c3c3e30e..72dbddc6 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 306.6, "throttle_ratio": 0.819, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -306.3 + "ttft_p99_drift_ms": -306.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 306.6, + "std": 14.3, + "cv_pct": 4.67, + "stability": "noisy", + "runs": [ + 267.6, + 322.3, + 315.8, + 306.4, + 314.6, + 296.7, + 326.9, + 299.4, + 311.9, + 301.7, + 315.6, + 307.7, + 306.1, + 299.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/sustained/result.json index 97b8ebbf..f1af1137 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/bf16/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 306.6, "throttle_ratio": 0.819, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -306.3 + "ttft_p99_drift_ms": -306.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 306.6, + "std": 14.3, + "cv_pct": 4.67, + "stability": "noisy", + "runs": [ + 267.6, + 322.3, + 315.8, + 306.4, + 314.6, + 296.7, + 326.9, + 299.4, + 311.9, + 301.7, + 315.6, + 307.7, + 306.1, + 299.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json index c5467b23..b9121bec 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 472.4, "throttle_ratio": 0.918, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -489.7 + "ttft_p99_drift_ms": -489.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 472.4, + "std": 11.9, + "cv_pct": 2.51, + "stability": "stable", + "runs": [ + 455.4, + 475.3, + 464.4, + 488.4, + 476.5, + 479.4, + 465.6, + 471.6, + 462.5, + 468.5, + 496.3, + 463.9, + 460.1, + 486.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json index 5e0a930f..687beb9b 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/fp8/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 472.4, "throttle_ratio": 0.918, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -489.7 + "ttft_p99_drift_ms": -489.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 472.4, + "std": 11.9, + "cv_pct": 2.51, + "stability": "stable", + "runs": [ + 455.4, + 475.3, + 464.4, + 488.4, + 476.5, + 479.4, + 465.6, + 471.6, + 462.5, + 468.5, + 496.3, + 463.9, + 460.1, + 486.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/result.json index 74c8204c..3044937c 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 588.9, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -466.4 + "ttft_p99_drift_ms": -466.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 588.9, + "std": 8.1, + "cv_pct": 1.38, + "stability": "stable", + "runs": [ + 578.0, + 592.6, + 590.0, + 609.6, + 583.9, + 593.0, + 583.9, + 584.0, + 584.1, + 599.7, + 584.2, + 584.8, + 592.3, + 584.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/sustained/result.json index 64fff39e..19515355 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w4a16/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 588.9, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -466.4 + "ttft_p99_drift_ms": -466.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 588.9, + "std": 8.1, + "cv_pct": 1.38, + "stability": "stable", + "runs": [ + 578.0, + 592.6, + 590.0, + 609.6, + 583.9, + 593.0, + 583.9, + 584.0, + 584.1, + 599.7, + 584.2, + 584.8, + 592.3, + 584.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json index ffb70f72..8a210904 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 475.6, "throttle_ratio": 0.907, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -484.4 + "ttft_p99_drift_ms": -484.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 475.6, + "std": 12.6, + "cv_pct": 2.64, + "stability": "stable", + "runs": [ + 449.0, + 494.9, + 484.5, + 476.7, + 460.0, + 478.3, + 478.5, + 476.5, + 484.4, + 460.0, + 478.1, + 479.4, + 467.4, + 490.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json index 3f598712..0c81366f 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a16/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 475.6, "throttle_ratio": 0.907, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -484.4 + "ttft_p99_drift_ms": -484.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 475.6, + "std": 12.6, + "cv_pct": 2.64, + "stability": "stable", + "runs": [ + 449.0, + 494.9, + 484.5, + 476.7, + 460.0, + 478.3, + 478.5, + 476.5, + 484.4, + 460.0, + 478.1, + 479.4, + 467.4, + 490.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/result.json index d367a2b7..db9b3c8a 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/result.json @@ -366,7 +366,30 @@ "sustained_throughput_tokens_per_sec": 475.9, "throttle_ratio": 0.925, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -57.8 + "ttft_p99_drift_ms": -57.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 475.9, + "std": 9.2, + "cv_pct": 1.93, + "stability": "stable", + "runs": [ + 453.3, + 472.7, + 483.2, + 471.1, + 490.0, + 477.4, + 472.5, + 471.6, + 479.5, + 488.8, + 475.3, + 472.2, + 483.4, + 471.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/sustained/result.json index c2f822d9..b71ee9c9 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_C_nvidia_vllm_47f5d58e_4955fbb1/w8a8/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 475.9, "throttle_ratio": 0.925, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -57.8 + "ttft_p99_drift_ms": -57.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 475.9, + "std": 9.2, + "cv_pct": 1.93, + "stability": "stable", + "runs": [ + 453.3, + 472.7, + 483.2, + 471.1, + 490.0, + 477.4, + 472.5, + 471.6, + 479.5, + 488.8, + 475.3, + 472.2, + 483.4, + 471.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json index b52db8fa..728692e0 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/result.json @@ -341,7 +341,30 @@ "sustained_throughput_tokens_per_sec": 2693.3, "throttle_ratio": 0.821, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -63.4 + "ttft_p99_drift_ms": -63.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2693.3, + "std": 140.0, + "cv_pct": 5.2, + "stability": "noisy", + "runs": [ + 3023.8, + 2938.2, + 2481.2, + 2558.2, + 2650.5, + 2645.4, + 2618.4, + 2683.7, + 2624.8, + 2660.3, + 2705.4, + 2652.5, + 2764.3, + 2699.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json index c26d796f..9b8e14c5 100644 --- a/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_3090x1_suite_F_nvidia_vllm_47f5d58e_faf550ec/sustained/result.json @@ -257,7 +257,30 @@ "sustained_throughput_tokens_per_sec": 2693.3, "throttle_ratio": 0.821, "throttle_onset_minute": 3.0, - "ttft_p99_drift_ms": -63.4 + "ttft_p99_drift_ms": -63.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2693.3, + "std": 140.0, + "cv_pct": 5.2, + "stability": "noisy", + "runs": [ + 3023.8, + 2938.2, + 2481.2, + 2558.2, + 2650.5, + 2645.4, + 2618.4, + 2683.7, + 2624.8, + 2660.3, + 2705.4, + 2652.5, + 2764.3, + 2699.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/result.json index 7e3068b3..1aebc5c9 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/result.json @@ -508,7 +508,44 @@ "sustained_throughput_tokens_per_sec": 400.1, "throttle_ratio": 0.936, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -2.6 + "ttft_p99_drift_ms": -2.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 400.1, + "std": 6.3, + "cv_pct": 1.58, + "stability": "stable", + "runs": [ + 406.1, + 399.7, + 406.1, + 387.3, + 399.9, + 393.1, + 403.1, + 407.1, + 395.6, + 399.7, + 401.3, + 393.3, + 407.1, + 406.0, + 399.4, + 402.4, + 391.0, + 399.7, + 384.6, + 410.9, + 403.4, + 398.5, + 399.7, + 400.9, + 393.3, + 407.0, + 405.7, + 399.9 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/sustained/result.json index 8b3f7656..bc017356 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_A_nvidia_vllm_47f5d58e_d6543f77/sustained/result.json @@ -411,7 +411,44 @@ "sustained_throughput_tokens_per_sec": 400.1, "throttle_ratio": 0.936, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -2.6 + "ttft_p99_drift_ms": -2.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 400.1, + "std": 6.3, + "cv_pct": 1.58, + "stability": "stable", + "runs": [ + 406.1, + 399.7, + 406.1, + 387.3, + 399.9, + 393.1, + 403.1, + 407.1, + 395.6, + 399.7, + 401.3, + 393.3, + 407.1, + 406.0, + 399.4, + 402.4, + 391.0, + 399.7, + 384.6, + 410.9, + 403.4, + 398.5, + 399.7, + 400.9, + 393.3, + 407.0, + 405.7, + 399.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/result.json index 4bf3f78d..ded7ce76 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 393.1, "throttle_ratio": 0.905, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -186.0 + "ttft_p99_drift_ms": -186.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 393.1, + "std": 11.9, + "cv_pct": 3.03, + "stability": "noisy", + "runs": [ + 368.4, + 385.6, + 407.1, + 388.2, + 401.4, + 392.9, + 407.1, + 385.4, + 399.1, + 373.5, + 405.2, + 399.6, + 391.1, + 398.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/sustained/result.json index f6741165..bda3d0bc 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/bf16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 393.1, "throttle_ratio": 0.905, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -186.0 + "ttft_p99_drift_ms": -186.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 393.1, + "std": 11.9, + "cv_pct": 3.03, + "stability": "noisy", + "runs": [ + 368.4, + 385.6, + 407.1, + 388.2, + 401.4, + 392.9, + 407.1, + 385.4, + 399.1, + 373.5, + 405.2, + 399.6, + 391.1, + 398.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json index a00e451c..cac12c65 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 640.7, "throttle_ratio": 0.915, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -121.3 + "ttft_p99_drift_ms": -121.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 640.7, + "std": 13.7, + "cv_pct": 2.14, + "stability": "stable", + "runs": [ + 603.2, + 639.6, + 630.9, + 655.0, + 638.3, + 638.3, + 651.3, + 641.3, + 647.4, + 641.4, + 659.2, + 629.4, + 645.2, + 649.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json index e6c613d8..4983b045 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/fp8/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 640.7, "throttle_ratio": 0.915, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -121.3 + "ttft_p99_drift_ms": -121.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 640.7, + "std": 13.7, + "cv_pct": 2.14, + "stability": "stable", + "runs": [ + 603.2, + 639.6, + 630.9, + 655.0, + 638.3, + 638.3, + 651.3, + 641.3, + 647.4, + 641.4, + 659.2, + 629.4, + 645.2, + 649.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json index 69c78edd..8a8b9f06 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 854.9, "throttle_ratio": 0.919, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -236.8 + "ttft_p99_drift_ms": -236.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 854.9, + "std": 19.1, + "cv_pct": 2.23, + "stability": "stable", + "runs": [ + 814.1, + 856.6, + 858.9, + 849.2, + 850.8, + 849.4, + 876.6, + 828.0, + 856.1, + 852.3, + 846.7, + 879.9, + 864.3, + 886.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json index 015f9cbc..97de99c0 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w4a16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 854.9, "throttle_ratio": 0.919, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -236.8 + "ttft_p99_drift_ms": -236.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 854.9, + "std": 19.1, + "cv_pct": 2.23, + "stability": "stable", + "runs": [ + 814.1, + 856.6, + 858.9, + 849.2, + 850.8, + 849.4, + 876.6, + 828.0, + 856.1, + 852.3, + 846.7, + 879.9, + 864.3, + 886.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json index 6f634adb..95316948 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 628.8, "throttle_ratio": 0.916, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -237.0 + "ttft_p99_drift_ms": -237.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 628.8, + "std": 16.5, + "cv_pct": 2.63, + "stability": "stable", + "runs": [ + 593.7, + 648.2, + 608.6, + 648.0, + 610.4, + 627.1, + 619.1, + 648.0, + 635.4, + 623.2, + 640.1, + 638.9, + 625.9, + 636.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json index 3f281590..708866f5 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 628.8, "throttle_ratio": 0.916, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -237.0 + "ttft_p99_drift_ms": -237.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 628.8, + "std": 16.5, + "cv_pct": 2.63, + "stability": "stable", + "runs": [ + 593.7, + 648.2, + 608.6, + 648.0, + 610.4, + 627.1, + 619.1, + 648.0, + 635.4, + 623.2, + 640.1, + 638.9, + 625.9, + 636.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/result.json index 814021f9..05ca431a 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/result.json @@ -370,7 +370,30 @@ "sustained_throughput_tokens_per_sec": 567.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -121.8 + "ttft_p99_drift_ms": -121.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 567.7, + "std": 10.8, + "cv_pct": 1.9, + "stability": "stable", + "runs": [ + 540.8, + 556.4, + 564.4, + 577.5, + 563.1, + 568.6, + 569.2, + 569.7, + 569.3, + 570.9, + 574.7, + 583.3, + 559.2, + 580.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/sustained/result.json index 42d3a726..a1dcd617 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_C_nvidia_vllm_47f5d58e_b59b0798/w8a8/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 567.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -121.8 + "ttft_p99_drift_ms": -121.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 567.7, + "std": 10.8, + "cv_pct": 1.9, + "stability": "stable", + "runs": [ + 540.8, + 556.4, + 564.4, + 577.5, + 563.1, + 568.6, + 569.2, + 569.7, + 569.3, + 570.9, + 574.7, + 583.3, + 559.2, + 580.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/result.json index b8776e99..252e1be8 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/result.json @@ -345,7 +345,30 @@ "sustained_throughput_tokens_per_sec": 5995.2, "throttle_ratio": 0.901, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -25.3 + "ttft_p99_drift_ms": -25.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 5995.2, + "std": 266.2, + "cv_pct": 4.44, + "stability": "noisy", + "runs": [ + 6220.0, + 6244.2, + 6209.5, + 6276.3, + 6214.1, + 6252.7, + 6249.8, + 5974.5, + 5679.0, + 5654.4, + 5674.7, + 5677.9, + 5730.7, + 5874.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/sustained/result.json index 3098b1c7..df53813f 100644 --- a/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090_dx1_suite_F_nvidia_vllm_47f5d58e_06662a14/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 5995.2, "throttle_ratio": 0.901, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -25.3 + "ttft_p99_drift_ms": -25.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 5995.2, + "std": 266.2, + "cv_pct": 4.44, + "stability": "noisy", + "runs": [ + 6220.0, + 6244.2, + 6209.5, + 6276.3, + 6214.1, + 6252.7, + 6249.8, + 5974.5, + 5679.0, + 5654.4, + 5674.7, + 5677.9, + 5730.7, + 5874.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json index fb016a51..16fdc40b 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/result.json @@ -511,7 +511,44 @@ "sustained_throughput_tokens_per_sec": 339.8, "throttle_ratio": 0.91, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -3.1 + "ttft_p99_drift_ms": -3.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 339.8, + "std": 8.8, + "cv_pct": 2.58, + "stability": "stable", + "runs": [ + 334.4, + 352.3, + 338.5, + 327.8, + 339.6, + 356.3, + 339.2, + 348.1, + 339.9, + 348.3, + 339.3, + 339.5, + 334.5, + 339.3, + 336.5, + 352.2, + 329.1, + 343.0, + 332.5, + 340.7, + 324.4, + 331.1, + 356.6, + 331.0, + 331.0, + 351.4, + 332.6, + 344.9 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json index 73561592..c96c9708 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_A_nvidia_vllm_47f5d58e_675e325e/sustained/result.json @@ -411,7 +411,44 @@ "sustained_throughput_tokens_per_sec": 339.8, "throttle_ratio": 0.91, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -3.1 + "ttft_p99_drift_ms": -3.1, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 339.8, + "std": 8.8, + "cv_pct": 2.58, + "stability": "stable", + "runs": [ + 334.4, + 352.3, + 338.5, + 327.8, + 339.6, + 356.3, + 339.2, + 348.1, + 339.9, + 348.3, + 339.3, + 339.5, + 334.5, + 339.3, + 336.5, + 352.2, + 329.1, + 343.0, + 332.5, + 340.7, + 324.4, + 331.1, + 356.6, + 331.0, + 331.0, + 351.4, + 332.6, + 344.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/result.json index c97e8e54..a3591049 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 334.4, "throttle_ratio": 0.867, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -349.6 + "ttft_p99_drift_ms": -349.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 334.4, + "std": 11.9, + "cv_pct": 3.57, + "stability": "noisy", + "runs": [ + 304.8, + 345.4, + 326.5, + 351.5, + 327.2, + 338.0, + 326.8, + 337.5, + 339.0, + 343.2, + 332.3, + 348.6, + 326.4, + 334.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/sustained/result.json index c5116404..e34a05c5 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/bf16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 334.4, "throttle_ratio": 0.867, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -349.6 + "ttft_p99_drift_ms": -349.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 334.4, + "std": 11.9, + "cv_pct": 3.57, + "stability": "noisy", + "runs": [ + 304.8, + 345.4, + 326.5, + 351.5, + 327.2, + 338.0, + 326.8, + 337.5, + 339.0, + 343.2, + 332.3, + 348.6, + 326.4, + 334.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json index 2d419279..c619c421 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 472.1, "throttle_ratio": 0.907, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -85.0 + "ttft_p99_drift_ms": -85.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 472.1, + "std": 11.5, + "cv_pct": 2.44, + "stability": "stable", + "runs": [ + 445.8, + 458.1, + 469.9, + 469.6, + 465.7, + 477.8, + 479.1, + 487.9, + 471.7, + 491.4, + 465.6, + 476.4, + 473.9, + 476.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json index 93a2b9af..8f68cc7a 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/fp8/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 472.1, "throttle_ratio": 0.907, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -85.0 + "ttft_p99_drift_ms": -85.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 472.1, + "std": 11.5, + "cv_pct": 2.44, + "stability": "stable", + "runs": [ + 445.8, + 458.1, + 469.9, + 469.6, + 465.7, + 477.8, + 479.1, + 487.9, + 471.7, + 491.4, + 465.6, + 476.4, + 473.9, + 476.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json index 0620dcb8..99318739 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 606.2, "throttle_ratio": 0.916, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -238.3 + "ttft_p99_drift_ms": -238.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 606.2, + "std": 15.7, + "cv_pct": 2.59, + "stability": "stable", + "runs": [ + 584.6, + 597.4, + 624.2, + 591.2, + 638.4, + 590.3, + 598.1, + 616.0, + 601.1, + 605.8, + 616.5, + 591.5, + 624.0, + 607.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json index f32285f8..a18a7e21 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w4a16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 606.2, "throttle_ratio": 0.916, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -238.3 + "ttft_p99_drift_ms": -238.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 606.2, + "std": 15.7, + "cv_pct": 2.59, + "stability": "stable", + "runs": [ + 584.6, + 597.4, + 624.2, + 591.2, + 638.4, + 590.3, + 598.1, + 616.0, + 601.1, + 605.8, + 616.5, + 591.5, + 624.0, + 607.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/result.json index ef22615a..2b98e86b 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 506.0, "throttle_ratio": 0.938, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -216.0 + "ttft_p99_drift_ms": -216.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 506.0, + "std": 9.3, + "cv_pct": 1.84, + "stability": "stable", + "runs": [ + 488.0, + 512.2, + 495.5, + 515.4, + 501.6, + 507.0, + 508.3, + 517.7, + 509.8, + 505.8, + 507.7, + 499.2, + 520.5, + 494.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/sustained/result.json index 8e45b655..539c6bba 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a16/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 506.0, "throttle_ratio": 0.938, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -216.0 + "ttft_p99_drift_ms": -216.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 506.0, + "std": 9.3, + "cv_pct": 1.84, + "stability": "stable", + "runs": [ + 488.0, + 512.2, + 495.5, + 515.4, + 501.6, + 507.0, + 508.3, + 517.7, + 509.8, + 505.8, + 507.7, + 499.2, + 520.5, + 494.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json index 2f01c7e8..128bca26 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/result.json @@ -374,7 +374,30 @@ "sustained_throughput_tokens_per_sec": 438.9, "throttle_ratio": 0.931, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -133.3 + "ttft_p99_drift_ms": -133.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 438.9, + "std": 10.7, + "cv_pct": 2.43, + "stability": "stable", + "runs": [ + 419.8, + 434.5, + 439.4, + 447.0, + 426.1, + 445.6, + 447.0, + 434.7, + 441.1, + 448.9, + 418.8, + 448.8, + 449.6, + 443.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json index f0f86a0a..8f9aa498 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_C_nvidia_vllm_47f5d58e_6d7e1d48/w8a8/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 438.9, "throttle_ratio": 0.931, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -133.3 + "ttft_p99_drift_ms": -133.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 438.9, + "std": 10.7, + "cv_pct": 2.43, + "stability": "stable", + "runs": [ + 419.8, + 434.5, + 439.4, + 447.0, + 426.1, + 445.6, + 447.0, + 434.7, + 441.1, + 448.9, + 418.8, + 448.8, + 449.6, + 443.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json index 8b04786a..bcc41c80 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 1698.1, "throttle_ratio": 0.922, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -105.2 + "ttft_p99_drift_ms": -105.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1698.1, + "std": 39.2, + "cv_pct": 2.31, + "stability": "stable", + "runs": [ + 1715.3, + 1751.9, + 1713.1, + 1671.7, + 1724.1, + 1660.5, + 1687.0, + 1622.5, + 1658.9, + 1760.4, + 1680.1, + 1676.2, + 1735.0, + 1716.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json index db87a8e7..d1779d30 100644 --- a/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x1_suite_F_nvidia_vllm_47f5d58e_b228454f/sustained/result.json @@ -261,7 +261,30 @@ "sustained_throughput_tokens_per_sec": 1698.1, "throttle_ratio": 0.922, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -105.2 + "ttft_p99_drift_ms": -105.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1698.1, + "std": 39.2, + "cv_pct": 2.31, + "stability": "stable", + "runs": [ + 1715.3, + 1751.9, + 1713.1, + 1671.7, + 1724.1, + 1660.5, + 1687.0, + 1622.5, + 1658.9, + 1760.4, + 1680.1, + 1676.2, + 1735.0, + 1716.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json index af5e0a5c..1c0fe3d4 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/result.json @@ -553,7 +553,44 @@ "sustained_throughput_tokens_per_sec": 104.5, "throttle_ratio": 0.761, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 92.3 + "ttft_p99_drift_ms": 92.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 104.5, + "std": 7.2, + "cv_pct": 6.86, + "stability": "noisy", + "runs": [ + 96.8, + 106.6, + 106.6, + 100.9, + 109.9, + 105.4, + 98.3, + 105.1, + 103.0, + 113.9, + 101.1, + 95.9, + 108.9, + 110.1, + 94.4, + 102.9, + 111.3, + 103.9, + 118.0, + 89.8, + 114.0, + 101.2, + 95.2, + 108.8, + 115.3, + 94.1, + 109.8, + 104.4 + ] + } }, "interactive": { "ttft_ms_p50": 277.19, diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json index cb86a2ad..249553cd 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_B_nvidia_vllm_47f5d58e_cfd0bdc8/sustained/result.json @@ -451,7 +451,44 @@ "sustained_throughput_tokens_per_sec": 104.5, "throttle_ratio": 0.761, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 92.3 + "ttft_p99_drift_ms": 92.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 104.5, + "std": 7.2, + "cv_pct": 6.86, + "stability": "noisy", + "runs": [ + 96.8, + 106.6, + 106.6, + 100.9, + 109.9, + 105.4, + 98.3, + 105.1, + 103.0, + 113.9, + 101.1, + 95.9, + 108.9, + 110.1, + 94.4, + 102.9, + 111.3, + 103.9, + 118.0, + 89.8, + 114.0, + 101.2, + 95.2, + 108.8, + 115.3, + 94.1, + 109.8, + 104.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json index f57c2d05..678c24fd 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/result.json @@ -550,7 +550,44 @@ "sustained_throughput_tokens_per_sec": 325.5, "throttle_ratio": 0.886, "throttle_onset_minute": 5.0, - "ttft_p99_drift_ms": -19.3 + "ttft_p99_drift_ms": -19.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 325.5, + "std": 9.7, + "cv_pct": 2.99, + "stability": "stable", + "runs": [ + 348.2, + 317.8, + 340.5, + 312.2, + 342.5, + 323.8, + 311.0, + 329.7, + 314.2, + 308.6, + 338.2, + 323.2, + 326.9, + 326.3, + 324.1, + 329.2, + 329.8, + 323.9, + 333.7, + 318.7, + 329.3, + 316.3, + 317.7, + 334.9, + 329.4, + 322.7, + 321.0, + 319.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json index 433f8f6f..405ca5df 100644 --- a/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_4090x8_suite_G_nvidia_vllm_47f5d58e_a4179ecc/sustained/result.json @@ -451,7 +451,44 @@ "sustained_throughput_tokens_per_sec": 325.5, "throttle_ratio": 0.886, "throttle_onset_minute": 5.0, - "ttft_p99_drift_ms": -19.3 + "ttft_p99_drift_ms": -19.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 325.5, + "std": 9.7, + "cv_pct": 2.99, + "stability": "stable", + "runs": [ + 348.2, + 317.8, + 340.5, + 312.2, + 342.5, + 323.8, + 311.0, + 329.7, + 314.2, + 308.6, + 338.2, + 323.2, + 326.9, + 326.3, + 324.1, + 329.2, + 329.8, + 323.9, + 333.7, + 318.7, + 329.3, + 316.3, + 317.7, + 334.9, + 329.4, + 322.7, + 321.0, + 319.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/result.json index 1308f73d..11baab5a 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/result.json @@ -522,7 +522,44 @@ "sustained_throughput_tokens_per_sec": 707.5, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -33.3 + "ttft_p99_drift_ms": -33.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 707.5, + "std": 9.9, + "cv_pct": 1.4, + "stability": "stable", + "runs": [ + 700.3, + 705.2, + 720.8, + 690.5, + 705.3, + 718.8, + 706.6, + 711.1, + 700.4, + 708.3, + 720.0, + 710.9, + 691.8, + 699.3, + 726.0, + 702.1, + 708.6, + 701.3, + 722.9, + 700.6, + 714.3, + 701.7, + 695.2, + 728.2, + 699.1, + 705.2, + 708.6, + 707.1 + ] + } }, "burst": { "sla_ttft_ms": 500, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/sustained/result.json index cf184255..62c1d274 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_A_nvidia_vllm_47f5d58e_b8f8ed0f/sustained/result.json @@ -422,7 +422,44 @@ "sustained_throughput_tokens_per_sec": 707.5, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -33.3 + "ttft_p99_drift_ms": -33.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 707.5, + "std": 9.9, + "cv_pct": 1.4, + "stability": "stable", + "runs": [ + 700.3, + 705.2, + 720.8, + 690.5, + 705.3, + 718.8, + 706.6, + 711.1, + 700.4, + 708.3, + 720.0, + 710.9, + 691.8, + 699.3, + 726.0, + 702.1, + 708.6, + 701.3, + 722.9, + 700.6, + 714.3, + 701.7, + 695.2, + 728.2, + 699.1, + 705.2, + 708.6, + 707.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json index fb307d1f..cc31153f 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/result.json @@ -425,7 +425,30 @@ "sustained_throughput_tokens_per_sec": 676.2, "throttle_ratio": 0.429, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -29850.4 + "ttft_p99_drift_ms": -29850.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 676.2, + "std": 105.7, + "cv_pct": 15.64, + "stability": "high-variance", + "runs": [ + 310.3, + 701.9, + 691.2, + 717.0, + 697.2, + 705.7, + 708.4, + 704.3, + 690.8, + 713.3, + 698.7, + 694.2, + 710.3, + 723.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json index 9d9b9a61..7a3c03f4 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/bf16/sustained/result.json @@ -312,7 +312,30 @@ "sustained_throughput_tokens_per_sec": 676.2, "throttle_ratio": 0.429, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -29850.4 + "ttft_p99_drift_ms": -29850.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 676.2, + "std": 105.7, + "cv_pct": 15.64, + "stability": "high-variance", + "runs": [ + 310.3, + 701.9, + 691.2, + 717.0, + 697.2, + 705.7, + 708.4, + 704.3, + 690.8, + 713.3, + 698.7, + 694.2, + 710.3, + 723.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json index eb2298d0..3cb556fc 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/result.json @@ -425,7 +425,30 @@ "sustained_throughput_tokens_per_sec": 1381.4, "throttle_ratio": 0.439, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -30389.8 + "ttft_p99_drift_ms": -30389.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1381.4, + "std": 213.7, + "cv_pct": 15.47, + "stability": "high-variance", + "runs": [ + 641.1, + 1449.1, + 1415.5, + 1411.0, + 1429.2, + 1424.7, + 1453.5, + 1454.5, + 1422.3, + 1435.2, + 1456.1, + 1440.6, + 1447.5, + 1459.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json index 22d6cdb7..ca9ac78b 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w4a16/sustained/result.json @@ -312,7 +312,30 @@ "sustained_throughput_tokens_per_sec": 1381.4, "throttle_ratio": 0.439, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -30389.8 + "ttft_p99_drift_ms": -30389.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1381.4, + "std": 213.7, + "cv_pct": 15.47, + "stability": "high-variance", + "runs": [ + 641.1, + 1449.1, + 1415.5, + 1411.0, + 1429.2, + 1424.7, + 1453.5, + 1454.5, + 1422.3, + 1435.2, + 1456.1, + 1440.6, + 1447.5, + 1459.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json index bac4cb5a..770f027f 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/result.json @@ -425,7 +425,30 @@ "sustained_throughput_tokens_per_sec": 1148.7, "throttle_ratio": 0.36, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -35987.3 + "ttft_p99_drift_ms": -35987.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1148.7, + "std": 204.2, + "cv_pct": 17.78, + "stability": "high-variance", + "runs": [ + 440.3, + 1192.8, + 1217.1, + 1190.1, + 1223.8, + 1197.8, + 1200.5, + 1215.8, + 1201.4, + 1197.9, + 1216.1, + 1187.5, + 1191.1, + 1209.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json index 7ae61d5f..15ef4dbb 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_C_nvidia_vllm_47f5d58e_d1baa050/w8a16/sustained/result.json @@ -312,7 +312,30 @@ "sustained_throughput_tokens_per_sec": 1148.7, "throttle_ratio": 0.36, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -35987.3 + "ttft_p99_drift_ms": -35987.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1148.7, + "std": 204.2, + "cv_pct": 17.78, + "stability": "high-variance", + "runs": [ + 440.3, + 1192.8, + 1217.1, + 1190.1, + 1223.8, + 1197.8, + 1200.5, + 1215.8, + 1201.4, + 1197.9, + 1216.1, + 1187.5, + 1191.1, + 1209.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/result.json index e665b3f9..5b6f5ea4 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/result.json @@ -466,7 +466,44 @@ "sustained_throughput_tokens_per_sec": 51.5, "throttle_ratio": 0.845, "throttle_onset_minute": 11.0, - "ttft_p99_drift_ms": -35433.6 + "ttft_p99_drift_ms": -35433.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 51.5, + "std": 2.0, + "cv_pct": 3.83, + "stability": "noisy", + "runs": [ + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 55.4, + 51.3, + 51.2, + 46.9, + 55.4, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 55.4, + 47.0, + 55.5, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/sustained/result.json index 1c772358..5d0c5acb 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_D_nvidia_vllm_47f5d58e_e87e6c36/sustained/result.json @@ -422,7 +422,44 @@ "sustained_throughput_tokens_per_sec": 51.5, "throttle_ratio": 0.845, "throttle_onset_minute": 11.0, - "ttft_p99_drift_ms": -35433.6 + "ttft_p99_drift_ms": -35433.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 51.5, + "std": 2.0, + "cv_pct": 3.83, + "stability": "noisy", + "runs": [ + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 55.4, + 51.3, + 51.2, + 46.9, + 55.4, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2, + 55.4, + 47.0, + 55.5, + 51.2, + 51.2, + 51.2, + 51.2, + 51.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json index cac5f1b6..1cf0d911 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/result.json @@ -359,7 +359,30 @@ "sustained_throughput_tokens_per_sec": 3941.2, "throttle_ratio": 0.137, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -49045.0 + "ttft_p99_drift_ms": -49045.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3941.2, + "std": 974.0, + "cv_pct": 24.71, + "stability": "high-variance", + "runs": [ + 683.0, + 4100.0, + 4041.5, + 4995.0, + 4429.7, + 4028.5, + 4021.4, + 4158.3, + 3928.9, + 4175.5, + 4112.6, + 4274.6, + 4203.8, + 4024.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json index 7dc690da..45594d91 100644 --- a/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json +++ b/results/verified/nvidia_geforce_rtx_5090x1_suite_F_nvidia_vllm_47f5d58e_776d2702/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 3941.2, "throttle_ratio": 0.137, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -49045.0 + "ttft_p99_drift_ms": -49045.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3941.2, + "std": 974.0, + "cv_pct": 24.71, + "stability": "high-variance", + "runs": [ + 683.0, + 4100.0, + 4041.5, + 4995.0, + 4429.7, + 4028.5, + 4021.4, + 4158.3, + 3928.9, + 4175.5, + 4112.6, + 4274.6, + 4203.8, + 4024.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/result.json index 96df1a3f..719e3163 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/result.json @@ -478,7 +478,44 @@ "sustained_throughput_tokens_per_sec": 907.1, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -1.0 + "ttft_p99_drift_ms": -1.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 907.1, + "std": 11.3, + "cv_pct": 1.25, + "stability": "stable", + "runs": [ + 932.2, + 905.7, + 909.4, + 893.4, + 919.9, + 906.1, + 904.7, + 913.0, + 911.6, + 910.9, + 894.4, + 912.3, + 910.6, + 905.7, + 910.2, + 903.0, + 898.6, + 911.5, + 888.3, + 911.6, + 900.5, + 903.7, + 924.4, + 883.7, + 911.4, + 914.1, + 922.4, + 884.7 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/sustained/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/sustained/result.json index f43caa42..325ab779 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/sustained/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_A_nvidia_vllm_47f5d58e_831c95a7/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 907.1, "throttle_ratio": 0.948, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -1.0 + "ttft_p99_drift_ms": -1.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 907.1, + "std": 11.3, + "cv_pct": 1.25, + "stability": "stable", + "runs": [ + 932.2, + 905.7, + 909.4, + 893.4, + 919.9, + 906.1, + 904.7, + 913.0, + 911.6, + 910.9, + 894.4, + 912.3, + 910.6, + 905.7, + 910.2, + 903.0, + 898.6, + 911.5, + 888.3, + 911.6, + 900.5, + 903.7, + 924.4, + 883.7, + 911.4, + 914.1, + 922.4, + 884.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json index f718da39..9e207d46 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/result.json @@ -421,7 +421,44 @@ "sustained_throughput_tokens_per_sec": 142.6, "throttle_ratio": 0.8, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 327.4 + "ttft_p99_drift_ms": 327.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 142.6, + "std": 13.3, + "cv_pct": 9.33, + "stability": "high-variance", + "runs": [ + 136.5, + 136.6, + 136.5, + 136.5, + 170.7, + 136.5, + 136.5, + 136.5, + 136.5, + 170.7, + 136.5, + 136.6, + 136.6, + 136.5, + 170.5, + 136.5, + 136.6, + 136.6, + 136.5, + 136.6, + 170.7, + 136.5, + 136.5, + 136.6, + 136.5, + 170.7, + 136.5, + 136.6 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json index dad0da89..5ebc9840 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_D_nvidia_vllm_47f5d58e_02748da4/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 142.6, "throttle_ratio": 0.8, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 327.4 + "ttft_p99_drift_ms": 327.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 142.6, + "std": 13.3, + "cv_pct": 9.33, + "stability": "high-variance", + "runs": [ + 136.5, + 136.6, + 136.5, + 136.5, + 170.7, + 136.5, + 136.5, + 136.5, + 136.5, + 170.7, + 136.5, + 136.6, + 136.6, + 136.5, + 170.5, + 136.5, + 136.6, + 136.6, + 136.5, + 136.6, + 170.7, + 136.5, + 136.5, + 136.6, + 136.5, + 170.7, + 136.5, + 136.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/result.json index 396625d1..3e139b6c 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 6144.7, "throttle_ratio": 0.96, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -19.0 + "ttft_p99_drift_ms": -19.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 6144.7, + "std": 64.6, + "cv_pct": 1.05, + "stability": "stable", + "runs": [ + 6336.2, + 6162.6, + 6134.7, + 6123.2, + 6087.1, + 6131.9, + 6123.4, + 6204.8, + 6085.4, + 6082.5, + 6153.4, + 6160.7, + 6110.4, + 6129.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/sustained/result.json b/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/sustained/result.json index 9d299d69..007da95e 100644 --- a/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/sustained/result.json +++ b/results/verified/nvidia_h100_80gb_hbm3x1_suite_F_nvidia_vllm_47f5d58e_2c0b7beb/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 6144.7, "throttle_ratio": 0.96, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -19.0 + "ttft_p99_drift_ms": -19.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 6144.7, + "std": 64.6, + "cv_pct": 1.05, + "stability": "stable", + "runs": [ + 6336.2, + 6162.6, + 6134.7, + 6123.2, + 6087.1, + 6131.9, + 6123.4, + 6204.8, + 6085.4, + 6082.5, + 6153.4, + 6160.7, + 6110.4, + 6129.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json index b67c6d7d..6a5294d5 100644 --- a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json +++ b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/result.json @@ -526,7 +526,44 @@ "sustained_throughput_tokens_per_sec": 709.2, "throttle_ratio": 0.791, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": 45.9 + "ttft_p99_drift_ms": 45.9, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 709.2, + "std": 40.9, + "cv_pct": 5.77, + "stability": "noisy", + "runs": [ + 851.5, + 842.9, + 677.1, + 719.3, + 673.5, + 699.1, + 702.7, + 686.1, + 706.4, + 676.1, + 719.9, + 690.8, + 711.4, + 703.6, + 704.7, + 700.4, + 699.5, + 694.9, + 707.4, + 674.7, + 705.4, + 705.4, + 713.4, + 690.6, + 696.4, + 701.8, + 696.1, + 705.6 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json index 412e3363..bd40f8e2 100644 --- a/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_A_nvidia_vllm_47f5d58e_29b2ec38/sustained/result.json @@ -428,7 +428,44 @@ "sustained_throughput_tokens_per_sec": 709.2, "throttle_ratio": 0.791, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": 45.9 + "ttft_p99_drift_ms": 45.9, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 709.2, + "std": 40.9, + "cv_pct": 5.77, + "stability": "noisy", + "runs": [ + 851.5, + 842.9, + 677.1, + 719.3, + 673.5, + 699.1, + 702.7, + 686.1, + 706.4, + 676.1, + 719.9, + 690.8, + 711.4, + 703.6, + 704.7, + 700.4, + 699.5, + 694.9, + 707.4, + 674.7, + 705.4, + 705.4, + 713.4, + 690.6, + 696.4, + 701.8, + 696.1, + 705.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/result.json index af08ca34..43b139da 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 709.4, "throttle_ratio": 0.868, "throttle_onset_minute": 13.0, - "ttft_p99_drift_ms": -149.4 + "ttft_p99_drift_ms": -149.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.4, + "std": 30.8, + "cv_pct": 4.34, + "stability": "noisy", + "runs": [ + 709.5, + 703.8, + 729.3, + 727.1, + 713.1, + 728.9, + 721.0, + 715.9, + 726.3, + 734.4, + 716.6, + 727.1, + 641.9, + 637.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/sustained/result.json index dc96dc06..5d2bc3cc 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/bf16/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 709.4, "throttle_ratio": 0.868, "throttle_onset_minute": 13.0, - "ttft_p99_drift_ms": -149.4 + "ttft_p99_drift_ms": -149.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 709.4, + "std": 30.8, + "cv_pct": 4.34, + "stability": "noisy", + "runs": [ + 709.5, + 703.8, + 729.3, + 727.1, + 713.1, + 728.9, + 721.0, + 715.9, + 726.3, + 734.4, + 716.6, + 727.1, + 641.9, + 637.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json index 2d2ab534..3784d212 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 713.4, "throttle_ratio": 0.888, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -69.4 + "ttft_p99_drift_ms": -69.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 713.4, + "std": 21.2, + "cv_pct": 2.97, + "stability": "stable", + "runs": [ + 736.7, + 762.2, + 721.6, + 676.8, + 719.7, + 711.0, + 715.7, + 695.0, + 712.8, + 702.3, + 727.1, + 685.7, + 708.4, + 712.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json index f1c1cbfb..785ddd38 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/fp8/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 713.4, "throttle_ratio": 0.888, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -69.4 + "ttft_p99_drift_ms": -69.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 713.4, + "std": 21.2, + "cv_pct": 2.97, + "stability": "stable", + "runs": [ + 736.7, + 762.2, + 721.6, + 676.8, + 719.7, + 711.0, + 715.7, + 695.0, + 712.8, + 702.3, + 727.1, + 685.7, + 708.4, + 712.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/result.json index fd1ef782..73d2749d 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 649.9, "throttle_ratio": 0.886, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -146.9 + "ttft_p99_drift_ms": -146.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 649.9, + "std": 21.8, + "cv_pct": 3.35, + "stability": "noisy", + "runs": [ + 713.6, + 677.3, + 649.8, + 636.1, + 652.4, + 646.2, + 633.3, + 649.0, + 638.9, + 655.1, + 642.4, + 636.4, + 632.1, + 636.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/sustained/result.json index 6caffea6..9705f2f2 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w4a16/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 649.9, "throttle_ratio": 0.886, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -146.9 + "ttft_p99_drift_ms": -146.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 649.9, + "std": 21.8, + "cv_pct": 3.35, + "stability": "noisy", + "runs": [ + 713.6, + 677.3, + 649.8, + 636.1, + 652.4, + 646.2, + 633.3, + 649.0, + 638.9, + 655.1, + 642.4, + 636.4, + 632.1, + 636.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/result.json index 488b494f..51797ff3 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 708.2, "throttle_ratio": 0.878, "throttle_onset_minute": 11.0, - "ttft_p99_drift_ms": -145.8 + "ttft_p99_drift_ms": -145.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 708.2, + "std": 34.8, + "cv_pct": 4.91, + "stability": "noisy", + "runs": [ + 710.4, + 720.8, + 741.0, + 742.8, + 739.9, + 728.3, + 734.6, + 723.4, + 737.2, + 704.2, + 664.2, + 654.3, + 662.2, + 652.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/sustained/result.json index 801e6a43..4ef6e3cb 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a16/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 708.2, "throttle_ratio": 0.878, "throttle_onset_minute": 11.0, - "ttft_p99_drift_ms": -145.8 + "ttft_p99_drift_ms": -145.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 708.2, + "std": 34.8, + "cv_pct": 4.91, + "stability": "noisy", + "runs": [ + 710.4, + 720.8, + 741.0, + 742.8, + 739.9, + 728.3, + 734.6, + 723.4, + 737.2, + 704.2, + 664.2, + 654.3, + 662.2, + 652.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/result.json index 980a521e..13d327f9 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/result.json @@ -457,7 +457,30 @@ "sustained_throughput_tokens_per_sec": 694.7, "throttle_ratio": 0.932, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -100.3 + "ttft_p99_drift_ms": -100.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 694.7, + "std": 12.1, + "cv_pct": 1.74, + "stability": "stable", + "runs": [ + 730.5, + 688.0, + 700.2, + 690.9, + 686.4, + 704.0, + 690.4, + 691.5, + 700.7, + 690.2, + 695.2, + 680.6, + 692.2, + 685.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/sustained/result.json b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/sustained/result.json index 736cc3f8..0d28b31f 100644 --- a/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_C_nvidia_vllm_47f5d58e_f07c60f8/w8a8/sustained/result.json @@ -348,7 +348,30 @@ "sustained_throughput_tokens_per_sec": 694.7, "throttle_ratio": 0.932, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -100.3 + "ttft_p99_drift_ms": -100.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 694.7, + "std": 12.1, + "cv_pct": 1.74, + "stability": "stable", + "runs": [ + 730.5, + 688.0, + 700.2, + 690.9, + 686.4, + 704.0, + 690.4, + 691.5, + 700.7, + 690.2, + 695.2, + 680.6, + 692.2, + 685.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json index d36e3809..023009e0 100644 --- a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json +++ b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/result.json @@ -471,7 +471,44 @@ "sustained_throughput_tokens_per_sec": 132.9, "throttle_ratio": 0.6, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -55.8 + "ttft_p99_drift_ms": -55.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 132.9, + "std": 14.2, + "cv_pct": 10.69, + "stability": "high-variance", + "runs": [ + 136.5, + 136.6, + 136.4, + 136.5, + 136.6, + 136.6, + 136.4, + 136.5, + 136.5, + 136.6, + 136.6, + 136.5, + 170.7, + 136.5, + 136.6, + 136.5, + 136.5, + 136.5, + 136.6, + 136.5, + 136.6, + 136.5, + 136.5, + 136.5, + 102.4, + 102.4, + 102.4, + 102.4 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json index 6d8e3212..b27c0571 100644 --- a/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_D_nvidia_vllm_47f5d58e_62a36028/sustained/result.json @@ -428,7 +428,44 @@ "sustained_throughput_tokens_per_sec": 132.9, "throttle_ratio": 0.6, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -55.8 + "ttft_p99_drift_ms": -55.8, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 132.9, + "std": 14.2, + "cv_pct": 10.69, + "stability": "high-variance", + "runs": [ + 136.5, + 136.6, + 136.4, + 136.5, + 136.6, + 136.6, + 136.4, + 136.5, + 136.5, + 136.6, + 136.6, + 136.5, + 170.7, + 136.5, + 136.6, + 136.5, + 136.5, + 136.5, + 136.6, + 136.5, + 136.6, + 136.5, + 136.5, + 136.5, + 102.4, + 102.4, + 102.4, + 102.4 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/result.json b/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/result.json index 0f171076..7ffc29f7 100644 --- a/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/result.json +++ b/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/result.json @@ -431,7 +431,30 @@ "sustained_throughput_tokens_per_sec": 1425.4, "throttle_ratio": 0.826, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -364.2 + "ttft_p99_drift_ms": -364.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1425.4, + "std": 66.2, + "cv_pct": 4.65, + "stability": "noisy", + "runs": [ + 1306.2, + 1417.0, + 1440.4, + 1411.2, + 1413.3, + 1581.2, + 1523.2, + 1435.7, + 1342.6, + 1396.7, + 1408.2, + 1415.5, + 1436.7, + 1427.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/sustained/result.json b/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/sustained/result.json index 3889eef9..0d2a661d 100644 --- a/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/sustained/result.json +++ b/results/verified/nvidia_h200x1_suite_F_nvidia_vllm_47f5d58e_53471efa/sustained/result.json @@ -347,7 +347,30 @@ "sustained_throughput_tokens_per_sec": 1425.4, "throttle_ratio": 0.826, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -364.2 + "ttft_p99_drift_ms": -364.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1425.4, + "std": 66.2, + "cv_pct": 4.65, + "stability": "noisy", + "runs": [ + 1306.2, + 1417.0, + 1440.4, + 1411.2, + 1413.3, + 1581.2, + 1523.2, + 1435.7, + 1342.6, + 1396.7, + 1408.2, + 1415.5, + 1436.7, + 1427.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json index 01daefd8..0519bb09 100644 --- a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json +++ b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/result.json @@ -596,7 +596,44 @@ "sustained_throughput_tokens_per_sec": 241.2, "throttle_ratio": 0.807, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -1.5 + "ttft_p99_drift_ms": -1.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 241.2, + "std": 12.7, + "cv_pct": 5.28, + "stability": "noisy", + "runs": [ + 283.6, + 281.0, + 237.6, + 240.6, + 232.5, + 236.4, + 238.2, + 242.7, + 241.5, + 231.0, + 244.4, + 236.3, + 242.5, + 235.3, + 230.9, + 237.7, + 243.4, + 235.9, + 240.4, + 232.4, + 240.7, + 246.4, + 232.5, + 239.3, + 233.9, + 235.6, + 252.2, + 228.8 + ] + } }, "interactive": { "ttft_ms_p50": 77.23, diff --git a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json index 3a4cd706..ee0e251c 100644 --- a/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json +++ b/results/verified/nvidia_h200x8_suite_B_nvidia_vllm_47f5d58e_b727568e/sustained/result.json @@ -497,7 +497,44 @@ "sustained_throughput_tokens_per_sec": 241.2, "throttle_ratio": 0.807, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -1.5 + "ttft_p99_drift_ms": -1.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 241.2, + "std": 12.7, + "cv_pct": 5.28, + "stability": "noisy", + "runs": [ + 283.6, + 281.0, + 237.6, + 240.6, + 232.5, + 236.4, + 238.2, + 242.7, + 241.5, + 231.0, + 244.4, + 236.3, + 242.5, + 235.3, + 230.9, + 237.7, + 243.4, + 235.9, + 240.4, + 232.4, + 240.7, + 246.4, + 232.5, + 239.3, + 233.9, + 235.6, + 252.2, + 228.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/result.json b/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/result.json index b27ea8ea..ffb5d59a 100644 --- a/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/result.json +++ b/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/result.json @@ -594,7 +594,44 @@ "sustained_throughput_tokens_per_sec": 591.5, "throttle_ratio": 0.939, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 12.0 + "ttft_p99_drift_ms": 12.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 591.5, + "std": 9.7, + "cv_pct": 1.64, + "stability": "stable", + "runs": [ + 576.8, + 613.1, + 587.8, + 582.0, + 595.7, + 591.5, + 589.9, + 585.6, + 599.3, + 594.4, + 596.9, + 576.0, + 608.9, + 579.4, + 599.9, + 588.2, + 607.2, + 576.5, + 586.3, + 582.1, + 593.9, + 599.3, + 597.4, + 588.5, + 583.3, + 598.1, + 588.0, + 596.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/sustained/result.json b/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/sustained/result.json index 08974360..2cf787f8 100644 --- a/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/sustained/result.json +++ b/results/verified/nvidia_h200x8_suite_G_nvidia_vllm_47f5d58e_7f7a270e/sustained/result.json @@ -498,7 +498,44 @@ "sustained_throughput_tokens_per_sec": 591.5, "throttle_ratio": 0.939, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 12.0 + "ttft_p99_drift_ms": 12.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 591.5, + "std": 9.7, + "cv_pct": 1.64, + "stability": "stable", + "runs": [ + 576.8, + 613.1, + 587.8, + 582.0, + 595.7, + 591.5, + 589.9, + 585.6, + 599.3, + 594.4, + 596.9, + 576.0, + 608.9, + 579.4, + 599.9, + 588.2, + 607.2, + 576.5, + 586.3, + 582.1, + 593.9, + 599.3, + 597.4, + 588.5, + 583.3, + 598.1, + 588.0, + 596.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/result.json b/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/result.json index e6b4b502..71a19486 100644 --- a/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/result.json @@ -523,7 +523,44 @@ "sustained_throughput_tokens_per_sec": 486.6, "throttle_ratio": 0.918, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 1.0 + "ttft_p99_drift_ms": 1.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 486.6, + "std": 9.5, + "cv_pct": 1.95, + "stability": "stable", + "runs": [ + 473.5, + 493.5, + 488.1, + 510.3, + 497.4, + 487.5, + 499.5, + 485.3, + 478.4, + 480.3, + 493.7, + 476.1, + 490.1, + 479.3, + 490.6, + 489.5, + 479.2, + 488.8, + 477.4, + 473.5, + 493.7, + 488.2, + 476.5, + 495.9, + 468.6, + 496.5, + 484.2, + 489.8 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/sustained/result.json index 4a35f301..9339f76d 100644 --- a/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_A_nvidia_vllm_47f5d58e_3f6269bb/sustained/result.json @@ -422,7 +422,44 @@ "sustained_throughput_tokens_per_sec": 486.6, "throttle_ratio": 0.918, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 1.0 + "ttft_p99_drift_ms": 1.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 486.6, + "std": 9.5, + "cv_pct": 1.95, + "stability": "stable", + "runs": [ + 473.5, + 493.5, + 488.1, + 510.3, + 497.4, + 487.5, + 499.5, + 485.3, + 478.4, + 480.3, + 493.7, + 476.1, + 490.1, + 479.3, + 490.6, + 489.5, + 479.2, + 488.8, + 477.4, + 473.5, + 493.7, + 488.2, + 476.5, + 495.9, + 468.6, + 496.5, + 484.2, + 489.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/result.json index 10356053..fedfa82a 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 484.8, "throttle_ratio": 0.887, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -202.4 + "ttft_p99_drift_ms": -202.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 484.8, + "std": 15.2, + "cv_pct": 3.14, + "stability": "noisy", + "runs": [ + 443.9, + 489.5, + 495.1, + 480.6, + 500.6, + 476.2, + 492.7, + 486.5, + 482.4, + 486.2, + 499.3, + 465.9, + 499.2, + 488.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/sustained/result.json index d40598ee..99d5bd3a 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/bf16/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 484.8, "throttle_ratio": 0.887, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -202.4 + "ttft_p99_drift_ms": -202.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 484.8, + "std": 15.2, + "cv_pct": 3.14, + "stability": "noisy", + "runs": [ + 443.9, + 489.5, + 495.1, + 480.6, + 500.6, + 476.2, + 492.7, + 486.5, + 482.4, + 486.2, + 499.3, + 465.9, + 499.2, + 488.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json index 688d6ca8..a44887c8 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 494.9, "throttle_ratio": 0.92, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -111.0 + "ttft_p99_drift_ms": -111.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 494.9, + "std": 12.6, + "cv_pct": 2.55, + "stability": "stable", + "runs": [ + 492.6, + 482.1, + 498.6, + 498.3, + 504.2, + 472.5, + 501.9, + 505.4, + 490.9, + 483.1, + 512.3, + 477.3, + 495.1, + 513.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json index 21d4e7a4..489ee85f 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/fp8/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 494.9, "throttle_ratio": 0.92, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -111.0 + "ttft_p99_drift_ms": -111.0, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 494.9, + "std": 12.6, + "cv_pct": 2.55, + "stability": "stable", + "runs": [ + 492.6, + 482.1, + 498.6, + 498.3, + 504.2, + 472.5, + 501.9, + 505.4, + 490.9, + 483.1, + 512.3, + 477.3, + 495.1, + 513.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/result.json index 7979d6b7..d0708288 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 648.8, "throttle_ratio": 0.934, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -206.6 + "ttft_p99_drift_ms": -206.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 648.8, + "std": 9.5, + "cv_pct": 1.46, + "stability": "stable", + "runs": [ + 625.3, + 669.7, + 649.9, + 651.4, + 652.7, + 645.9, + 645.9, + 642.4, + 647.3, + 647.3, + 648.6, + 648.7, + 648.9, + 658.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/sustained/result.json index a01e5d58..f669eeb0 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w4a16/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 648.8, "throttle_ratio": 0.934, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -206.6 + "ttft_p99_drift_ms": -206.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 648.8, + "std": 9.5, + "cv_pct": 1.46, + "stability": "stable", + "runs": [ + 625.3, + 669.7, + 649.9, + 651.4, + 652.7, + 645.9, + 645.9, + 642.4, + 647.3, + 647.3, + 648.6, + 648.7, + 648.9, + 658.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json index 5aa2dc27..f00600e8 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 645.2, "throttle_ratio": 0.889, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -199.4 + "ttft_p99_drift_ms": -199.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 645.2, + "std": 18.2, + "cv_pct": 2.82, + "stability": "stable", + "runs": [ + 593.7, + 667.9, + 642.7, + 648.9, + 646.6, + 648.3, + 657.5, + 641.8, + 648.5, + 660.6, + 643.0, + 654.4, + 623.1, + 655.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json index cb0bb6a6..e8396426 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a16/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 645.2, "throttle_ratio": 0.889, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -199.4 + "ttft_p99_drift_ms": -199.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 645.2, + "std": 18.2, + "cv_pct": 2.82, + "stability": "stable", + "runs": [ + 593.7, + 667.9, + 642.7, + 648.9, + 646.6, + 648.3, + 657.5, + 641.8, + 648.5, + 660.6, + 643.0, + 654.4, + 623.1, + 655.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json index d2844329..c897b869 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/result.json @@ -381,7 +381,30 @@ "sustained_throughput_tokens_per_sec": 533.8, "throttle_ratio": 0.908, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -110.9 + "ttft_p99_drift_ms": -110.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 533.8, + "std": 12.3, + "cv_pct": 2.3, + "stability": "stable", + "runs": [ + 501.4, + 534.3, + 552.1, + 527.3, + 528.0, + 530.2, + 539.2, + 546.2, + 526.4, + 537.3, + 526.7, + 540.2, + 542.8, + 541.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json index b2534826..9627dbbe 100644 --- a/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_C_nvidia_vllm_47f5d58e_1bcdc710/w8a8/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 533.8, "throttle_ratio": 0.908, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -110.9 + "ttft_p99_drift_ms": -110.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 533.8, + "std": 12.3, + "cv_pct": 2.3, + "stability": "stable", + "runs": [ + 501.4, + 534.3, + 552.1, + 527.3, + 528.0, + 530.2, + 539.2, + 546.2, + 526.4, + 537.3, + 526.7, + 540.2, + 542.8, + 541.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json index 8eb272c8..3b28d14f 100644 --- a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/result.json @@ -464,7 +464,44 @@ "sustained_throughput_tokens_per_sec": 41.4, "throttle_ratio": 0.499, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 516.2 + "ttft_p99_drift_ms": 516.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 41.4, + "std": 14.3, + "cv_pct": 34.47, + "stability": "high-variance", + "runs": [ + 34.1, + 34.2, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.1, + 68.3, + 34.2, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.2, + 68.3 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json index d6ad4f0d..839bccf7 100644 --- a/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_D_nvidia_vllm_47f5d58e_60c91bf0/sustained/result.json @@ -422,7 +422,44 @@ "sustained_throughput_tokens_per_sec": 41.4, "throttle_ratio": 0.499, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 516.2 + "ttft_p99_drift_ms": 516.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 41.4, + "std": 14.3, + "cv_pct": 34.47, + "stability": "high-variance", + "runs": [ + 34.1, + 34.2, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.1, + 68.3, + 34.2, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.1, + 68.3, + 34.1, + 34.1, + 34.1, + 34.2, + 68.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/result.json b/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/result.json index 927da139..a3b8ad29 100644 --- a/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/result.json @@ -359,7 +359,30 @@ "sustained_throughput_tokens_per_sec": 1771.6, "throttle_ratio": 0.953, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -71.8 + "ttft_p99_drift_ms": -71.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1771.6, + "std": 28.4, + "cv_pct": 1.6, + "stability": "stable", + "runs": [ + 1749.4, + 1781.1, + 1757.1, + 1808.9, + 1813.9, + 1728.8, + 1761.8, + 1737.5, + 1774.0, + 1778.9, + 1786.9, + 1794.3, + 1729.1, + 1800.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/sustained/result.json b/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/sustained/result.json index 1d501f43..7938869b 100644 --- a/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/sustained/result.json +++ b/results/verified/nvidia_h20_3ex1_suite_F_nvidia_vllm_47f5d58e_1e7ed8ca/sustained/result.json @@ -272,7 +272,30 @@ "sustained_throughput_tokens_per_sec": 1771.6, "throttle_ratio": 0.953, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -71.8 + "ttft_p99_drift_ms": -71.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1771.6, + "std": 28.4, + "cv_pct": 1.6, + "stability": "stable", + "runs": [ + 1749.4, + 1781.1, + 1757.1, + 1808.9, + 1813.9, + 1728.8, + 1761.8, + 1737.5, + 1774.0, + 1778.9, + 1786.9, + 1794.3, + 1729.1, + 1800.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/result.json b/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/result.json index 29e70e1b..768a8fd0 100644 --- a/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/result.json +++ b/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/result.json @@ -564,7 +564,44 @@ "sustained_throughput_tokens_per_sec": 176.0, "throttle_ratio": 0.847, "throttle_onset_minute": 5.0, - "ttft_p99_drift_ms": -9.3 + "ttft_p99_drift_ms": -9.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 176.0, + "std": 6.5, + "cv_pct": 3.69, + "stability": "noisy", + "runs": [ + 174.4, + 171.1, + 181.0, + 169.7, + 177.7, + 181.8, + 176.6, + 177.2, + 177.7, + 176.4, + 177.1, + 168.1, + 175.3, + 176.6, + 179.5, + 173.6, + 185.1, + 160.2, + 186.4, + 161.8, + 189.2, + 175.3, + 176.9, + 171.9, + 179.6, + 178.8, + 180.0, + 168.9 + ] + } }, "interactive": { "ttft_ms_p50": 133.03, diff --git a/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/sustained/result.json b/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/sustained/result.json index 3061cd4f..1ac693dc 100644 --- a/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/sustained/result.json +++ b/results/verified/nvidia_h20_3ex8_suite_B_nvidia_vllm_47f5d58e_76ce4cd0/sustained/result.json @@ -462,7 +462,44 @@ "sustained_throughput_tokens_per_sec": 176.0, "throttle_ratio": 0.847, "throttle_onset_minute": 5.0, - "ttft_p99_drift_ms": -9.3 + "ttft_p99_drift_ms": -9.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 176.0, + "std": 6.5, + "cv_pct": 3.69, + "stability": "noisy", + "runs": [ + 174.4, + 171.1, + 181.0, + 169.7, + 177.7, + 181.8, + 176.6, + 177.2, + 177.7, + 176.4, + 177.1, + 168.1, + 175.3, + 176.6, + 179.5, + 173.6, + 185.1, + 160.2, + 186.4, + 161.8, + 189.2, + 175.3, + 176.9, + 171.9, + 179.6, + 178.8, + 180.0, + 168.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/result.json b/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/result.json index e373079d..368cc57b 100644 --- a/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/result.json +++ b/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/result.json @@ -561,7 +561,44 @@ "sustained_throughput_tokens_per_sec": 561.2, "throttle_ratio": 0.936, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -45.4 + "ttft_p99_drift_ms": -45.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 561.2, + "std": 9.6, + "cv_pct": 1.71, + "stability": "stable", + "runs": [ + 561.4, + 545.8, + 565.7, + 583.2, + 554.6, + 567.3, + 556.8, + 555.3, + 571.4, + 549.8, + 561.4, + 549.8, + 565.6, + 568.0, + 563.2, + 561.4, + 562.6, + 570.8, + 546.6, + 561.1, + 576.4, + 549.6, + 550.5, + 573.5, + 556.8, + 555.4, + 555.4, + 573.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/sustained/result.json b/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/sustained/result.json index 73bdd2a1..29c90757 100644 --- a/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/sustained/result.json +++ b/results/verified/nvidia_h20_3ex8_suite_G_nvidia_vllm_47f5d58e_7bd76bb5/sustained/result.json @@ -462,7 +462,44 @@ "sustained_throughput_tokens_per_sec": 561.2, "throttle_ratio": 0.936, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -45.4 + "ttft_p99_drift_ms": -45.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 561.2, + "std": 9.6, + "cv_pct": 1.71, + "stability": "stable", + "runs": [ + 561.4, + 545.8, + 565.7, + 583.2, + 554.6, + 567.3, + 556.8, + 555.3, + 571.4, + 549.8, + 561.4, + 549.8, + 565.6, + 568.0, + 563.2, + 561.4, + 562.6, + 570.8, + 546.6, + 561.1, + 576.4, + 549.6, + 550.5, + 573.5, + 556.8, + 555.4, + 555.4, + 573.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json index 66048c9b..2d9a7a00 100644 --- a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json +++ b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/result.json @@ -477,7 +477,44 @@ "sustained_throughput_tokens_per_sec": 116.6, "throttle_ratio": 0.748, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 11.6 + "ttft_p99_drift_ms": 11.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 116.6, + "std": 10.5, + "cv_pct": 8.99, + "stability": "high-variance", + "runs": [ + 98.7, + 132.0, + 105.3, + 132.0, + 117.6, + 99.5, + 128.9, + 106.2, + 126.7, + 126.2, + 108.1, + 120.4, + 105.7, + 126.7, + 117.5, + 107.6, + 120.4, + 113.0, + 126.8, + 124.2, + 103.4, + 110.6, + 121.4, + 118.3, + 125.2, + 102.6, + 110.5, + 130.0 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json index 49224331..49880a7c 100644 --- a/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json +++ b/results/verified/nvidia_l4x1_suite_A_nvidia_vllm_47f5d58e_b991b4c1/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 116.6, "throttle_ratio": 0.748, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 11.6 + "ttft_p99_drift_ms": 11.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 116.6, + "std": 10.5, + "cv_pct": 8.99, + "stability": "high-variance", + "runs": [ + 98.7, + 132.0, + 105.3, + 132.0, + 117.6, + 99.5, + 128.9, + 106.2, + 126.7, + 126.2, + 108.1, + 120.4, + 105.7, + 126.7, + 117.5, + 107.6, + 120.4, + 113.0, + 126.8, + 124.2, + 103.4, + 110.6, + 121.4, + 118.3, + 125.2, + 102.6, + 110.5, + 130.0 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/result.json b/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/result.json index 7c987a20..b2d60369 100644 --- a/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/result.json +++ b/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 2837.3, "throttle_ratio": 0.983, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -51.3 + "ttft_p99_drift_ms": -51.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2837.3, + "std": 16.9, + "cv_pct": 0.6, + "stability": "stable", + "runs": [ + 2816.1, + 2841.7, + 2863.3, + 2824.1, + 2815.9, + 2862.6, + 2858.2, + 2813.6, + 2848.7, + 2828.0, + 2844.6, + 2837.7, + 2830.5, + 2836.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/sustained/result.json b/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/sustained/result.json index 5d1125ae..05e85ae7 100644 --- a/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/sustained/result.json +++ b/results/verified/nvidia_l4x1_suite_F_nvidia_vllm_47f5d58e_d58fa923/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 2837.3, "throttle_ratio": 0.983, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -51.3 + "ttft_p99_drift_ms": -51.3, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2837.3, + "std": 16.9, + "cv_pct": 0.6, + "stability": "stable", + "runs": [ + 2816.1, + 2841.7, + 2863.3, + 2824.1, + 2815.9, + 2862.6, + 2858.2, + 2813.6, + 2848.7, + 2828.0, + 2844.6, + 2837.7, + 2830.5, + 2836.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/result.json b/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/result.json index fcc6a4cd..a747a7d5 100644 --- a/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/result.json +++ b/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 3880.8, "throttle_ratio": 0.982, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -48.2 + "ttft_p99_drift_ms": -48.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3880.8, + "std": 20.4, + "cv_pct": 0.52, + "stability": "stable", + "runs": [ + 3838.2, + 3867.3, + 3892.6, + 3896.5, + 3882.7, + 3870.2, + 3904.8, + 3845.2, + 3891.9, + 3873.4, + 3888.0, + 3885.3, + 3886.2, + 3908.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/sustained/result.json b/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/sustained/result.json index e983bcbf..67ac38d6 100644 --- a/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/sustained/result.json +++ b/results/verified/nvidia_rtx_4000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_125c6b61/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 3880.8, "throttle_ratio": 0.982, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -48.2 + "ttft_p99_drift_ms": -48.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 3880.8, + "std": 20.4, + "cv_pct": 0.52, + "stability": "stable", + "runs": [ + 3838.2, + 3867.3, + 3892.6, + 3896.5, + 3882.7, + 3870.2, + 3904.8, + 3845.2, + 3891.9, + 3873.4, + 3888.0, + 3885.3, + 3886.2, + 3908.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json index 303f3772..9eee1e93 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/result.json @@ -478,7 +478,44 @@ "sustained_throughput_tokens_per_sec": 376.2, "throttle_ratio": 0.901, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 18.3 + "ttft_p99_drift_ms": 18.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 376.2, + "std": 8.1, + "cv_pct": 2.15, + "stability": "stable", + "runs": [ + 375.9, + 376.2, + 374.4, + 394.4, + 373.0, + 365.6, + 382.7, + 363.9, + 386.3, + 375.6, + 389.9, + 355.4, + 386.0, + 372.6, + 381.9, + 376.1, + 374.5, + 370.4, + 373.1, + 380.1, + 367.5, + 384.4, + 373.7, + 376.4, + 377.7, + 376.4, + 370.6, + 379.9 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json index 4d90e875..ad67a5c4 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_A_nvidia_vllm_47f5d58e_bd3b5d27/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 376.2, "throttle_ratio": 0.901, "throttle_onset_minute": null, - "ttft_p99_drift_ms": 18.3 + "ttft_p99_drift_ms": 18.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 376.2, + "std": 8.1, + "cv_pct": 2.15, + "stability": "stable", + "runs": [ + 375.9, + 376.2, + 374.4, + 394.4, + 373.0, + 365.6, + 382.7, + 363.9, + 386.3, + 375.6, + 389.9, + 355.4, + 386.0, + 372.6, + 381.9, + 376.1, + 374.5, + 370.4, + 373.1, + 380.1, + 367.5, + 384.4, + 373.7, + 376.4, + 377.7, + 376.4, + 370.6, + 379.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json index bd6f82b1..f8c9b082 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 371.9, "throttle_ratio": 0.894, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -239.9 + "ttft_p99_drift_ms": -239.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 371.9, + "std": 10.0, + "cv_pct": 2.69, + "stability": "stable", + "runs": [ + 348.3, + 386.1, + 365.8, + 369.9, + 372.0, + 370.2, + 372.6, + 371.2, + 389.7, + 362.3, + 380.4, + 374.5, + 373.2, + 370.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json index b3717fde..001df410 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/bf16/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 371.9, "throttle_ratio": 0.894, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -239.9 + "ttft_p99_drift_ms": -239.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 371.9, + "std": 10.0, + "cv_pct": 2.69, + "stability": "stable", + "runs": [ + 348.3, + 386.1, + 365.8, + 369.9, + 372.0, + 370.2, + 372.6, + 371.2, + 389.7, + 362.3, + 380.4, + 374.5, + 373.2, + 370.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/result.json index c557ea99..8b260dd4 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 586.1, "throttle_ratio": 0.969, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -131.6 + "ttft_p99_drift_ms": -131.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 586.1, + "std": 5.3, + "cv_pct": 0.9, + "stability": "stable", + "runs": [ + 581.9, + 582.3, + 590.4, + 582.1, + 588.5, + 592.1, + 591.0, + 588.4, + 582.5, + 573.7, + 588.2, + 582.6, + 590.6, + 590.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/sustained/result.json index 3473e5c4..e2b9f0fb 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/fp8/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 586.1, "throttle_ratio": 0.969, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -131.6 + "ttft_p99_drift_ms": -131.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 586.1, + "std": 5.3, + "cv_pct": 0.9, + "stability": "stable", + "runs": [ + 581.9, + 582.3, + 590.4, + 582.1, + 588.5, + 592.1, + 591.0, + 588.4, + 582.5, + 573.7, + 588.2, + 582.6, + 590.6, + 590.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/result.json index 10e8e5a7..4dc5165e 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 816.3, "throttle_ratio": 0.924, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -201.9 + "ttft_p99_drift_ms": -201.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 816.3, + "std": 15.8, + "cv_pct": 1.94, + "stability": "stable", + "runs": [ + 778.0, + 818.5, + 841.8, + 803.7, + 816.3, + 809.1, + 829.1, + 820.8, + 806.2, + 815.9, + 826.8, + 810.4, + 813.1, + 838.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/sustained/result.json index 510302b5..4d15f3f6 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w4a16/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 816.3, "throttle_ratio": 0.924, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -201.9 + "ttft_p99_drift_ms": -201.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 816.3, + "std": 15.8, + "cv_pct": 1.94, + "stability": "stable", + "runs": [ + 778.0, + 818.5, + 841.8, + 803.7, + 816.3, + 809.1, + 829.1, + 820.8, + 806.2, + 815.9, + 826.8, + 810.4, + 813.1, + 838.2 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/result.json index fafdfb44..a692cf2c 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 584.6, "throttle_ratio": 0.953, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -246.4 + "ttft_p99_drift_ms": -246.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 584.6, + "std": 8.1, + "cv_pct": 1.38, + "stability": "stable", + "runs": [ + 565.5, + 571.7, + 589.7, + 590.6, + 581.9, + 582.3, + 590.4, + 591.0, + 590.5, + 582.2, + 582.2, + 590.3, + 582.1, + 593.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/sustained/result.json index 9616ccd0..b52408a5 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a16/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 584.6, "throttle_ratio": 0.953, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -246.4 + "ttft_p99_drift_ms": -246.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 584.6, + "std": 8.1, + "cv_pct": 1.38, + "stability": "stable", + "runs": [ + 565.5, + 571.7, + 589.7, + 590.6, + 581.9, + 582.3, + 590.4, + 591.0, + 590.5, + 582.2, + 582.2, + 590.3, + 582.1, + 593.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/result.json index 454de8a9..a8762aec 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/result.json @@ -340,7 +340,30 @@ "sustained_throughput_tokens_per_sec": 534.5, "throttle_ratio": 0.952, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -110.6 + "ttft_p99_drift_ms": -110.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 534.5, + "std": 5.8, + "cv_pct": 1.09, + "stability": "stable", + "runs": [ + 519.8, + 531.9, + 532.4, + 535.4, + 535.9, + 536.8, + 536.4, + 537.7, + 536.4, + 533.3, + 527.9, + 537.8, + 546.0, + 534.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/sustained/result.json index 357b1453..b7a08b4b 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_C_nvidia_vllm_47f5d58e_e60276e9/w8a8/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 534.5, "throttle_ratio": 0.952, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -110.6 + "ttft_p99_drift_ms": -110.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 534.5, + "std": 5.8, + "cv_pct": 1.09, + "stability": "stable", + "runs": [ + 519.8, + 531.9, + 532.4, + 535.4, + 535.9, + 536.8, + 536.4, + 537.7, + 536.4, + 533.3, + 527.9, + 537.8, + 546.0, + 534.9 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json index 7bb06c17..bab319b7 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/result.json @@ -421,7 +421,44 @@ "sustained_throughput_tokens_per_sec": 32.3, "throttle_ratio": 0.461, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 2424.7 + "ttft_p99_drift_ms": 2424.7, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 32.3, + "std": 11.7, + "cv_pct": 36.18, + "stability": "high-variance", + "runs": [ + 25.6, + 25.6, + 25.6, + 55.4, + 25.6, + 29.9, + 25.6, + 51.2, + 25.6, + 29.9, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 25.6, + 55.5, + 25.6, + 29.9, + 51.1, + 25.7, + 25.6, + 29.9, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json index cb6e2f90..0667e527 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_D_nvidia_vllm_47f5d58e_42ab3af7/sustained/result.json @@ -377,7 +377,44 @@ "sustained_throughput_tokens_per_sec": 32.3, "throttle_ratio": 0.461, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 2424.7 + "ttft_p99_drift_ms": 2424.7, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 32.3, + "std": 11.7, + "cv_pct": 36.18, + "stability": "high-variance", + "runs": [ + 25.6, + 25.6, + 25.6, + 55.4, + 25.6, + 29.9, + 25.6, + 51.2, + 25.6, + 29.9, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 25.6, + 55.5, + 25.6, + 29.9, + 51.1, + 25.7, + 25.6, + 29.9, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json index 502115f5..61f97879 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 2895.0, "throttle_ratio": 0.773, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -18.1 + "ttft_p99_drift_ms": -18.1, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2895.0, + "std": 197.6, + "cv_pct": 6.83, + "stability": "noisy", + "runs": [ + 3526.2, + 3053.6, + 2843.1, + 2823.8, + 2814.7, + 2834.9, + 2805.7, + 2814.8, + 2790.3, + 2824.4, + 2862.5, + 2960.7, + 2848.3, + 2726.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json index e5d7bf0c..97bc5729 100644 --- a/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json +++ b/results/verified/nvidia_rtx_6000_ada_generationx1_suite_F_nvidia_vllm_47f5d58e_2b905f5e/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 2895.0, "throttle_ratio": 0.773, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -18.1 + "ttft_p99_drift_ms": -18.1, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2895.0, + "std": 197.6, + "cv_pct": 6.83, + "stability": "noisy", + "runs": [ + 3526.2, + 3053.6, + 2843.1, + 2823.8, + 2814.7, + 2834.9, + 2805.7, + 2814.8, + 2790.3, + 2824.4, + 2862.5, + 2960.7, + 2848.3, + 2726.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/result.json b/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/result.json index 726f575d..0cb9ba46 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/result.json @@ -533,7 +533,44 @@ "sustained_throughput_tokens_per_sec": 265.3, "throttle_ratio": 0.877, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -18.4 + "ttft_p99_drift_ms": -18.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 265.3, + "std": 8.6, + "cv_pct": 3.25, + "stability": "noisy", + "runs": [ + 259.8, + 264.2, + 272.7, + 273.7, + 262.9, + 270.4, + 247.2, + 262.5, + 273.3, + 262.8, + 272.8, + 270.2, + 252.1, + 266.8, + 263.8, + 272.9, + 258.3, + 278.3, + 270.2, + 248.1, + 262.5, + 281.9, + 254.3, + 272.9, + 270.5, + 260.2, + 259.2, + 264.1 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/sustained/result.json index b55019af..5c4fda4e 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_A_nvidia_vllm_47f5d58e_7cd0b745/sustained/result.json @@ -432,7 +432,44 @@ "sustained_throughput_tokens_per_sec": 265.3, "throttle_ratio": 0.877, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": -18.4 + "ttft_p99_drift_ms": -18.4, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 265.3, + "std": 8.6, + "cv_pct": 3.25, + "stability": "noisy", + "runs": [ + 259.8, + 264.2, + 272.7, + 273.7, + 262.9, + 270.4, + 247.2, + 262.5, + 273.3, + 262.8, + 272.8, + 270.2, + 252.1, + 266.8, + 263.8, + 272.9, + 258.3, + 278.3, + 270.2, + 248.1, + 262.5, + 281.9, + 254.3, + 272.9, + 270.5, + 260.2, + 259.2, + 264.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/result.json index b108afd2..db10bf97 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 272.0, "throttle_ratio": 0.874, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -232.8 + "ttft_p99_drift_ms": -232.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 272.0, + "std": 11.4, + "cv_pct": 4.19, + "stability": "noisy", + "runs": [ + 253.7, + 275.2, + 290.4, + 285.6, + 280.0, + 282.0, + 285.6, + 271.1, + 256.0, + 265.8, + 266.7, + 262.5, + 266.7, + 266.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/sustained/result.json index 06112e5a..01b6eee9 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/bf16/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 272.0, "throttle_ratio": 0.874, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -232.8 + "ttft_p99_drift_ms": -232.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 272.0, + "std": 11.4, + "cv_pct": 4.19, + "stability": "noisy", + "runs": [ + 253.7, + 275.2, + 290.4, + 285.6, + 280.0, + 282.0, + 285.6, + 271.1, + 256.0, + 265.8, + 266.7, + 262.5, + 266.7, + 266.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json index 9aa21947..083af211 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 435.5, "throttle_ratio": 0.891, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -477.8 + "ttft_p99_drift_ms": -477.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 435.5, + "std": 11.5, + "cv_pct": 2.65, + "stability": "stable", + "runs": [ + 407.9, + 438.6, + 442.3, + 439.1, + 432.0, + 441.9, + 428.9, + 438.9, + 457.6, + 425.9, + 442.3, + 430.0, + 443.6, + 427.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json index 65eabbca..8fc25e19 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/fp8/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 435.5, "throttle_ratio": 0.891, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -477.8 + "ttft_p99_drift_ms": -477.8, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 435.5, + "std": 11.5, + "cv_pct": 2.65, + "stability": "stable", + "runs": [ + 407.9, + 438.6, + 442.3, + 439.1, + 432.0, + 441.9, + 428.9, + 438.9, + 457.6, + 425.9, + 442.3, + 430.0, + 443.6, + 427.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json index 305ae8bf..70a58cb5 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 541.0, "throttle_ratio": 0.824, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": -318.2 + "ttft_p99_drift_ms": -318.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 541.0, + "std": 40.7, + "cv_pct": 7.53, + "stability": "noisy", + "runs": [ + 568.6, + 600.0, + 588.4, + 608.5, + 591.8, + 535.1, + 501.3, + 513.5, + 507.0, + 515.9, + 508.6, + 521.6, + 502.9, + 510.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json index 0281b3d5..c9789133 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w4a16/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 541.0, "throttle_ratio": 0.824, "throttle_onset_minute": 6.0, - "ttft_p99_drift_ms": -318.2 + "ttft_p99_drift_ms": -318.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 541.0, + "std": 40.7, + "cv_pct": 7.53, + "stability": "noisy", + "runs": [ + 568.6, + 600.0, + 588.4, + 608.5, + 591.8, + 535.1, + 501.3, + 513.5, + 507.0, + 515.9, + 508.6, + 521.6, + 502.9, + 510.7 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json index 73ae3e59..958e98f2 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 433.8, "throttle_ratio": 0.91, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -471.4 + "ttft_p99_drift_ms": -471.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 433.8, + "std": 9.6, + "cv_pct": 2.22, + "stability": "stable", + "runs": [ + 409.8, + 439.3, + 434.7, + 433.3, + 441.7, + 440.2, + 450.3, + 424.3, + 441.8, + 433.8, + 436.5, + 429.7, + 430.7, + 427.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json index da8913c7..464d45b6 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a16/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 433.8, "throttle_ratio": 0.91, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -471.4 + "ttft_p99_drift_ms": -471.4, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 433.8, + "std": 9.6, + "cv_pct": 2.22, + "stability": "stable", + "runs": [ + 409.8, + 439.3, + 434.7, + 433.3, + 441.7, + 440.2, + 450.3, + 424.3, + 441.8, + 433.8, + 436.5, + 429.7, + 430.7, + 427.6 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/result.json index e7a07246..ec510205 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/result.json @@ -391,7 +391,30 @@ "sustained_throughput_tokens_per_sec": 419.5, "throttle_ratio": 0.862, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -177.6 + "ttft_p99_drift_ms": -177.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 419.5, + "std": 16.4, + "cv_pct": 3.9, + "stability": "noisy", + "runs": [ + 384.3, + 426.5, + 445.8, + 403.7, + 422.8, + 435.2, + 396.5, + 437.6, + 420.4, + 418.3, + 426.9, + 417.6, + 412.9, + 424.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/sustained/result.json index b6428506..e18178cf 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_C_nvidia_vllm_47f5d58e_b87c1621/w8a8/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 419.5, "throttle_ratio": 0.862, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -177.6 + "ttft_p99_drift_ms": -177.6, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 419.5, + "std": 16.4, + "cv_pct": 3.9, + "stability": "noisy", + "runs": [ + 384.3, + 426.5, + 445.8, + 403.7, + 422.8, + 435.2, + 396.5, + 437.6, + 420.4, + 418.3, + 426.9, + 417.6, + 412.9, + 424.8 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json index f6f98fbe..70e12fab 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/result.json @@ -476,7 +476,44 @@ "sustained_throughput_tokens_per_sec": 30.5, "throttle_ratio": 0.461, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -34.2 + "ttft_p99_drift_ms": -34.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 30.5, + "std": 10.5, + "cv_pct": 34.45, + "stability": "high-variance", + "runs": [ + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.4, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json index 42a7881e..1695366c 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_D_nvidia_vllm_47f5d58e_f2197473/sustained/result.json @@ -432,7 +432,44 @@ "sustained_throughput_tokens_per_sec": 30.5, "throttle_ratio": 0.461, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -34.2 + "ttft_p99_drift_ms": -34.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 30.5, + "std": 10.5, + "cv_pct": 34.45, + "stability": "high-variance", + "runs": [ + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.4, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5, + 25.6, + 25.6, + 25.6, + 29.9, + 25.6, + 25.6, + 55.5 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json index 7d98f82d..16faa2cf 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/result.json @@ -369,7 +369,30 @@ "sustained_throughput_tokens_per_sec": 1917.3, "throttle_ratio": 0.728, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -21.5 + "ttft_p99_drift_ms": -21.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1917.3, + "std": 286.8, + "cv_pct": 14.96, + "stability": "high-variance", + "runs": [ + 2197.7, + 2238.6, + 2282.9, + 2213.9, + 2247.4, + 2232.2, + 1661.7, + 1675.0, + 1663.0, + 1705.4, + 1668.5, + 1678.5, + 1709.0, + 1668.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json index 9d66800d..96ccbaf5 100644 --- a/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x1_suite_F_nvidia_vllm_47f5d58e_a33d6eb3/sustained/result.json @@ -282,7 +282,30 @@ "sustained_throughput_tokens_per_sec": 1917.3, "throttle_ratio": 0.728, "throttle_onset_minute": 7.0, - "ttft_p99_drift_ms": -21.5 + "ttft_p99_drift_ms": -21.5, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 1917.3, + "std": 286.8, + "cv_pct": 14.96, + "stability": "high-variance", + "runs": [ + 2197.7, + 2238.6, + 2282.9, + 2213.9, + 2247.4, + 2232.2, + 1661.7, + 1675.0, + 1663.0, + 1705.4, + 1668.5, + 1678.5, + 1709.0, + 1668.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json index a31b870e..5e4a0c73 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/result.json @@ -574,7 +574,44 @@ "sustained_throughput_tokens_per_sec": 105.3, "throttle_ratio": 0.764, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -30.2 + "ttft_p99_drift_ms": -30.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 105.3, + "std": 6.8, + "cv_pct": 6.47, + "stability": "noisy", + "runs": [ + 108.7, + 106.6, + 102.3, + 97.9, + 111.4, + 102.8, + 101.8, + 105.3, + 113.3, + 110.1, + 100.9, + 96.0, + 108.5, + 116.2, + 88.8, + 110.6, + 111.3, + 104.4, + 107.8, + 93.2, + 109.5, + 110.7, + 101.6, + 106.6, + 103.9, + 112.4, + 93.3, + 112.3 + ] + } }, "interactive": { "ttft_ms_p50": 157.98, diff --git a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json index d271ba19..6b364b39 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_B_nvidia_vllm_47f5d58e_0981ecf7/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 105.3, "throttle_ratio": 0.764, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -30.2 + "ttft_p99_drift_ms": -30.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 105.3, + "std": 6.8, + "cv_pct": 6.47, + "stability": "noisy", + "runs": [ + 108.7, + 106.6, + 102.3, + 97.9, + 111.4, + 102.8, + 101.8, + 105.3, + 113.3, + 110.1, + 100.9, + 96.0, + 108.5, + 116.2, + 88.8, + 110.6, + 111.3, + 104.4, + 107.8, + 93.2, + 109.5, + 110.7, + 101.6, + 106.6, + 103.9, + 112.4, + 93.3, + 112.3 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/result.json b/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/result.json index ba4de6df..70f04083 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 343.0, "throttle_ratio": 0.893, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -70.2 + "ttft_p99_drift_ms": -70.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 343.0, + "std": 10.9, + "cv_pct": 3.17, + "stability": "noisy", + "runs": [ + 341.7, + 342.6, + 326.1, + 337.2, + 353.4, + 349.3, + 330.5, + 364.8, + 341.4, + 339.1, + 352.7, + 340.2, + 328.8, + 350.5, + 342.9, + 340.7, + 332.5, + 336.4, + 357.0, + 331.2, + 364.2, + 331.5, + 349.5, + 338.7, + 350.9, + 325.7, + 356.4, + 349.1 + ] + } } }, "accuracy": { diff --git a/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/sustained/result.json b/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/sustained/result.json index cd2c17af..53672ae3 100644 --- a/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/sustained/result.json +++ b/results/verified/nvidia_rtx_a6000x8_suite_G_nvidia_vllm_47f5d58e_a8cf2a0f/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 343.0, "throttle_ratio": 0.893, "throttle_onset_minute": 4.0, - "ttft_p99_drift_ms": -70.2 + "ttft_p99_drift_ms": -70.2, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 343.0, + "std": 10.9, + "cv_pct": 3.17, + "stability": "noisy", + "runs": [ + 341.7, + 342.6, + 326.1, + 337.2, + 353.4, + 349.3, + 330.5, + 364.8, + 341.4, + 339.1, + 352.7, + 340.2, + 328.8, + 350.5, + 342.9, + 340.7, + 332.5, + 336.4, + 357.0, + 331.2, + 364.2, + 331.5, + 349.5, + 338.7, + 350.9, + 325.7, + 356.4, + 349.1 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/result.json b/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/result.json index 06c3675f..9360106c 100644 --- a/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/result.json +++ b/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/result.json @@ -314,7 +314,30 @@ "sustained_throughput_tokens_per_sec": 2006.9, "throttle_ratio": 0.982, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -156.7 + "ttft_p99_drift_ms": -156.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2006.9, + "std": 10.1, + "cv_pct": 0.5, + "stability": "stable", + "runs": [ + 1999.4, + 2004.0, + 1998.1, + 2014.0, + 2013.6, + 2003.3, + 2015.3, + 2006.1, + 1997.0, + 1997.9, + 2013.0, + 1999.4, + 2033.6, + 2002.5 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/sustained/result.json b/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/sustained/result.json index 8f3b9484..9c1d9ecc 100644 --- a/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/sustained/result.json +++ b/results/verified/tesla_t4x1_suite_F_nvidia_vllm_47f5d58e_4660bc0b/sustained/result.json @@ -227,7 +227,30 @@ "sustained_throughput_tokens_per_sec": 2006.9, "throttle_ratio": 0.982, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -156.7 + "ttft_p99_drift_ms": -156.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2006.9, + "std": 10.1, + "cv_pct": 0.5, + "stability": "stable", + "runs": [ + 1999.4, + 2004.0, + 1998.1, + 2014.0, + 2013.6, + 2003.3, + 2015.3, + 2006.1, + 1997.0, + 1997.9, + 2013.0, + 1999.4, + 2033.6, + 2002.5 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/result.json index da09653a..769b621e 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/result.json @@ -500,7 +500,44 @@ "sustained_throughput_tokens_per_sec": 268.3, "throttle_ratio": 0.853, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -12.5 + "ttft_p99_drift_ms": -12.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 268.3, + "std": 11.1, + "cv_pct": 4.12, + "stability": "noisy", + "runs": [ + 265.0, + 294.4, + 276.4, + 269.7, + 295.7, + 281.2, + 281.4, + 271.3, + 263.5, + 265.3, + 255.6, + 262.4, + 276.5, + 255.3, + 263.9, + 254.9, + 282.2, + 262.4, + 271.5, + 252.3, + 262.6, + 268.9, + 261.8, + 269.2, + 260.4, + 262.4, + 269.1, + 257.7 + ] + } }, "speculative": { "results_by_concurrency": [ diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/sustained/result.json index adbaa643..2060ee84 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_A_nvidia_vllm_47f5d58e_48261ecc/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 268.3, "throttle_ratio": 0.853, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": -12.5 + "ttft_p99_drift_ms": -12.5, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 268.3, + "std": 11.1, + "cv_pct": 4.12, + "stability": "noisy", + "runs": [ + 265.0, + 294.4, + 276.4, + 269.7, + 295.7, + 281.2, + 281.4, + 271.3, + 263.5, + 265.3, + 255.6, + 262.4, + 276.5, + 255.3, + 263.9, + 254.9, + 282.2, + 262.4, + 271.5, + 252.3, + 262.6, + 268.9, + 261.8, + 269.2, + 260.4, + 262.4, + 269.1, + 257.7 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/result.json index b1111115..b0451f0e 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 265.9, "throttle_ratio": 0.864, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -315.2 + "ttft_p99_drift_ms": -315.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 265.9, + "std": 11.7, + "cv_pct": 4.41, + "stability": "noisy", + "runs": [ + 253.9, + 268.8, + 273.6, + 288.3, + 272.3, + 261.3, + 277.1, + 252.4, + 268.1, + 249.0, + 278.5, + 253.5, + 269.7, + 256.7 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/sustained/result.json index dd11245f..22e3245f 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/fp16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 265.9, "throttle_ratio": 0.864, "throttle_onset_minute": 1.0, - "ttft_p99_drift_ms": -315.2 + "ttft_p99_drift_ms": -315.2, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 265.9, + "std": 11.7, + "cv_pct": 4.41, + "stability": "noisy", + "runs": [ + 253.9, + 268.8, + 273.6, + 288.3, + 272.3, + 261.3, + 277.1, + 252.4, + 268.1, + 249.0, + 278.5, + 253.5, + 269.7, + 256.7 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json index 7fd24ec5..b93e3207 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/result.json @@ -361,7 +361,30 @@ "sustained_throughput_tokens_per_sec": 416.4, "throttle_ratio": 0.915, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -335.7 + "ttft_p99_drift_ms": -335.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 416.4, + "std": 10.9, + "cv_pct": 2.62, + "stability": "stable", + "runs": [ + 400.3, + 404.0, + 414.5, + 428.4, + 407.3, + 435.2, + 418.8, + 418.2, + 419.1, + 413.0, + 426.8, + 422.6, + 423.5, + 398.3 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json index 003f7bcc..8f855f82 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_C_nvidia_vllm_47f5d58e_b957e789/w4a16/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 416.4, "throttle_ratio": 0.915, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -335.7 + "ttft_p99_drift_ms": -335.7, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 416.4, + "std": 10.9, + "cv_pct": 2.62, + "stability": "stable", + "runs": [ + 400.3, + 404.0, + 414.5, + 428.4, + 407.3, + 435.2, + 418.8, + 418.2, + 419.1, + 413.0, + 426.8, + 422.6, + 423.5, + 398.3 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json index 17b35079..5e3dda03 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/result.json @@ -444,7 +444,44 @@ "sustained_throughput_tokens_per_sec": 14.9, "throttle_ratio": 0.399, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 34216.0 + "ttft_p99_drift_ms": 34216.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 14.9, + "std": 4.4, + "cv_pct": 29.72, + "stability": "high-variance", + "runs": [ + 17.1, + 17.1, + 8.5, + 17.1, + 17.1, + 8.5, + 17.1, + 21.3, + 8.5, + 17.1, + 17.1, + 8.5, + 21.3, + 17.1, + 8.5, + 17.1, + 17.1, + 17.1, + 8.5, + 17.1, + 17.1, + 8.5, + 21.3, + 17.1, + 8.5, + 17.1, + 12.8, + 17.1 + ] + } }, "online": { "sla_ttft_ms": 5000, diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json index 07f35034..7862c4c8 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_D_nvidia_vllm_47f5d58e_6eb549a8/sustained/result.json @@ -402,7 +402,44 @@ "sustained_throughput_tokens_per_sec": 14.9, "throttle_ratio": 0.399, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 34216.0 + "ttft_p99_drift_ms": 34216.0, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 14.9, + "std": 4.4, + "cv_pct": 29.72, + "stability": "high-variance", + "runs": [ + 17.1, + 17.1, + 8.5, + 17.1, + 17.1, + 8.5, + 17.1, + 21.3, + 8.5, + 17.1, + 17.1, + 8.5, + 21.3, + 17.1, + 8.5, + 17.1, + 17.1, + 17.1, + 8.5, + 17.1, + 17.1, + 8.5, + 21.3, + 17.1, + 8.5, + 17.1, + 12.8, + 17.1 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/result.json index 890bb38f..08b68576 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/result.json @@ -336,7 +336,30 @@ "sustained_throughput_tokens_per_sec": 2789.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -83.9 + "ttft_p99_drift_ms": -83.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2789.7, + "std": 50.6, + "cv_pct": 1.81, + "stability": "stable", + "runs": [ + 2797.6, + 2649.7, + 2789.6, + 2783.6, + 2742.5, + 2781.3, + 2791.3, + 2763.1, + 2817.4, + 2817.8, + 2842.1, + 2857.4, + 2830.4, + 2792.3 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/sustained/result.json index 2091a26b..39e3777b 100644 --- a/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx1_suite_F_nvidia_vllm_47f5d58e_04fce6f6/sustained/result.json @@ -252,7 +252,30 @@ "sustained_throughput_tokens_per_sec": 2789.7, "throttle_ratio": 0.927, "throttle_onset_minute": null, - "ttft_p99_drift_ms": -83.9 + "ttft_p99_drift_ms": -83.9, + "throughput_post_warmup_reliability": { + "n": 14, + "mean": 2789.7, + "std": 50.6, + "cv_pct": 1.81, + "stability": "stable", + "runs": [ + 2797.6, + 2649.7, + 2789.6, + 2783.6, + 2742.5, + 2781.3, + 2791.3, + 2763.1, + 2817.4, + 2817.8, + 2842.1, + 2857.4, + 2830.4, + 2792.3 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json index 6536a548..27dd8c63 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 92.8, "throttle_ratio": 0.749, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 34.6 + "ttft_p99_drift_ms": 34.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 92.8, + "std": 8.0, + "cv_pct": 8.63, + "stability": "high-variance", + "runs": [ + 86.6, + 95.9, + 92.6, + 103.3, + 80.9, + 93.4, + 92.7, + 84.7, + 102.0, + 89.7, + 88.6, + 94.3, + 92.8, + 106.5, + 81.9, + 92.7, + 100.4, + 83.5, + 99.3, + 86.1, + 97.7, + 82.1, + 97.0, + 102.8, + 79.8, + 106.2, + 84.7, + 99.2 + ] + } }, "interactive": { "ttft_ms_p50": 225.66, diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json index 23d04c69..e34441a4 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_B_nvidia_vllm_47f5d58e_48f19c22/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 92.8, "throttle_ratio": 0.749, "throttle_onset_minute": 2.0, - "ttft_p99_drift_ms": 34.6 + "ttft_p99_drift_ms": 34.6, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 92.8, + "std": 8.0, + "cv_pct": 8.63, + "stability": "high-variance", + "runs": [ + 86.6, + 95.9, + 92.6, + 103.3, + 80.9, + 93.4, + 92.7, + 84.7, + 102.0, + 89.7, + 88.6, + 94.3, + 92.8, + 106.5, + 81.9, + 92.7, + 100.4, + 83.5, + 99.3, + 86.1, + 97.7, + 82.1, + 97.0, + 102.8, + 79.8, + 106.2, + 84.7, + 99.2 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json index 4c93f34e..3d656bed 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/result.json @@ -571,7 +571,44 @@ "sustained_throughput_tokens_per_sec": 293.9, "throttle_ratio": 0.811, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": 22.3 + "ttft_p99_drift_ms": 22.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 293.9, + "std": 15.7, + "cv_pct": 5.35, + "stability": "noisy", + "runs": [ + 311.7, + 301.1, + 301.5, + 293.5, + 300.5, + 320.6, + 285.0, + 308.8, + 297.6, + 290.8, + 304.3, + 316.8, + 293.8, + 287.4, + 316.6, + 305.1, + 296.0, + 298.7, + 260.1, + 298.2, + 284.7, + 274.4, + 293.2, + 267.2, + 271.3, + 297.0, + 262.9, + 291.0 + ] + } } }, "accuracy": { diff --git a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json index 1804e6c1..ab2680be 100644 --- a/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json +++ b/results/verified/tesla_v100s_pcie_32gbx8_suite_G_nvidia_vllm_47f5d58e_2ef567be/sustained/result.json @@ -472,7 +472,44 @@ "sustained_throughput_tokens_per_sec": 293.9, "throttle_ratio": 0.811, "throttle_onset_minute": 8.0, - "ttft_p99_drift_ms": 22.3 + "ttft_p99_drift_ms": 22.3, + "throughput_post_warmup_reliability": { + "n": 28, + "mean": 293.9, + "std": 15.7, + "cv_pct": 5.35, + "stability": "noisy", + "runs": [ + 311.7, + 301.1, + 301.5, + 293.5, + 300.5, + 320.6, + 285.0, + 308.8, + 297.6, + 290.8, + 304.3, + 316.8, + 293.8, + 287.4, + 316.6, + 305.1, + 296.0, + 298.7, + 260.1, + 298.2, + 284.7, + 274.4, + 293.2, + 267.2, + 271.3, + 297.0, + 262.9, + 291.0 + ] + } } }, "accuracy": { diff --git a/schema/env.schema.json b/schema/env.schema.json index e80cd942..4d90a791 100644 --- a/schema/env.schema.json +++ b/schema/env.schema.json @@ -73,6 +73,11 @@ "intra_node_interconnect": { "type": ["string","null"], "description": "Intra-node GPU interconnect detected, e.g. 'NVLink', or null if not detected" + }, + "vendor_details": { + "type": ["object","null"], + "additionalProperties": true, + "description": "Optional bag of vendor-specific environment fields that don't fit any unified schema (e.g. NVIDIA NVML clocks, AMD ROCm-SMI counters, Ascend HCCL link health, Apple Metal version). The schema is deliberately permissive — each vendor's platforms/.py decides what to record. The leaderboard UI renders this dict as a flat key→value list, omitting null/empty values, and never tries to unify across vendors." } } } diff --git a/schema/suite.schema.json b/schema/suite.schema.json index 1367fe0d..7309b58a 100644 --- a/schema/suite.schema.json +++ b/schema/suite.schema.json @@ -117,11 +117,23 @@ "online_sla_ttft_ms": { "type": ["integer", "null"], "minimum": 1 }, "online_sla_ttft_ms_relaxed": { "type": ["integer", "null"], "minimum": 1 }, "online_request_count": { "type": ["integer", "null"], "minimum": 1 }, - "online_warmup_runs": { "type": "integer", "minimum": 0 }, + "online_warmup_runs": { + "type": "integer", "minimum": 0, + "description": "DEPRECATED. Previously unused — kept only to silence schema warnings on older suites. Use online_warmup_requests instead." + }, + "online_warmup_requests": { + "type": "integer", "minimum": 0, + "description": "Number of dummy requests fired sequentially before the online QPS sweep. Results are discarded. Used to JIT-compile kernels and prime the engine on cold start. Defaults to 10 if not set." + }, "interactive_request_count": { "type": ["integer", "null"], "minimum": 1 }, "interactive_warmup_runs": { "type": "integer", "minimum": 0 }, + "burst_warmup_requests": { + "type": "integer", "minimum": 0, + "description": "Number of dummy requests fired sequentially before the first burst cycle. Results are discarded. Defaults to 10 if not set." + }, + "sustained_concurrency": { "type": "integer", "minimum": 1 }, "duration_minutes": { "type": "number", "minimum": 0 }, "sample_interval_seconds": { "type": "number", "minimum": 0 }, diff --git a/suites/suite_A/suite.json b/suites/suite_A/suite.json index d8f6914a..e902022a 100644 --- a/suites/suite_A/suite.json +++ b/suites/suite_A/suite.json @@ -29,8 +29,9 @@ "num_runs": 3, "warmup_runs": 1, "warmup_minutes": 2, - "online_warmup_runs": 0, - "interactive_warmup_runs": 0 , + "online_warmup_requests": 10, + "burst_warmup_requests": 10, + "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 100, "_request_count_note": "offline uses request_count (100), online uses online_request_count (300, minimum for robust p99), interactive uses interactive_request_count (150, minimum for robust p95)", diff --git a/suites/suite_B/suite.json b/suites/suite_B/suite.json index b331dbd8..87ba98f0 100644 --- a/suites/suite_B/suite.json +++ b/suites/suite_B/suite.json @@ -31,7 +31,8 @@ "num_runs": 3, "warmup_runs": 1, "warmup_minutes": 2, - "online_warmup_runs": 0, + "online_warmup_requests": 10, + "burst_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 100, diff --git a/suites/suite_D/suite.json b/suites/suite_D/suite.json index 0f93adc9..2f439d0a 100644 --- a/suites/suite_D/suite.json +++ b/suites/suite_D/suite.json @@ -29,7 +29,7 @@ "num_runs": 2, "warmup_runs": 1, "warmup_minutes": 2, - "online_warmup_runs": 0, + "online_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 50, diff --git a/suites/suite_E/suite.json b/suites/suite_E/suite.json index dfaab1bb..6a3de48d 100644 --- a/suites/suite_E/suite.json +++ b/suites/suite_E/suite.json @@ -32,7 +32,7 @@ "online_sla_ttft_ms_relaxed": null, "num_runs": 3, "warmup_runs": 1, - "online_warmup_runs": 0, + "online_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 150, diff --git a/suites/suite_F/suite.json b/suites/suite_F/suite.json index 17f3a9b4..67851df0 100644 --- a/suites/suite_F/suite.json +++ b/suites/suite_F/suite.json @@ -33,7 +33,7 @@ "num_runs": 3, "warmup_runs": 1, "warmup_minutes": 1, - "online_warmup_runs": 0, + "online_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "_accuracy_note": "Qwen2.5-0.5B scores ~0.35–0.40 on MMLU by design (small model). The threshold detects broken quantization or misconfigured precision, not model quality.", diff --git a/suites/suite_G/suite.json b/suites/suite_G/suite.json index 7ef5aae2..0a339dab 100644 --- a/suites/suite_G/suite.json +++ b/suites/suite_G/suite.json @@ -31,7 +31,7 @@ "num_runs": 3, "warmup_runs": 1, "warmup_minutes": 2, - "online_warmup_runs": 0, + "online_warmup_requests": 10, "interactive_warmup_runs": 0, "accuracy_threshold_delta": 0.1, "request_count": 100,