diff --git a/in-depth.html b/in-depth.html index 00d51235..2270c0c7 100644 --- a/in-depth.html +++ b/in-depth.html @@ -46,89 +46,90 @@

In-Depth Analysis

JetStream 3 combines together a variety of JavaScript and WebAssembly benchmarks, covering a variety of - advanced workloads and programming techniques, and reports a single score that - balances them using a geometric mean. + advanced workloads and programming techniques, and reports a single score that balances them using the + geometric mean.

- Each benchmark measures a distinct workload, and no single optimization - technique is sufficient to speed up all benchmarks. Some benchmarks demonstrate tradeoffs, and - aggressive or specialized optimizations for one benchmark might make another benchmark slower. + Each benchmark measures a distinct workload, and no single optimization technique is sufficient to speed + up all benchmarks. + Some benchmarks demonstrate tradeoffs, and aggressive or specialized optimizations for one benchmark + might make another benchmark slower. JetStream 3 rewards browsers that start up quickly, execute code quickly, and continue running smoothly.

Each benchmark in JetStream 3 computes its own individual score. - Scores in JetStream are dimensionless floats, where a higher score is better. + Scores in JetStream are dimensionless floating point numbers, where a higher score is better. JetStream weighs each benchmark equally, taking the geometric mean over each individual benchmark's score to compute the overall JetStream 3 score. The geometric mean ensures that a multiplicative improvement of any individual score has the same effect on the aggregated score, regardless of the absolute value of the individual score. For example, an improvement by 5% of the sub score of benchmark A has the same effect on the total score - as an improvement by 5% of the sub score of benchmark B. + as an improvement by 5% of the sub score of benchmark B, even if A ran for a shorter time than B.

It's not enough to just measure the total running time of a workload. - Browsers may perform differently for the same JavaScript workload depending on how many times it - has run. For example, garbage collection runs periodically, making some iterations take longer than - others. Code that runs repeatedly gets optimized by the browser, so the first iteration - of any workload is usually more expensive than the rest. + Browsers may perform differently for the same workload depending on how many times it has run. + For example, garbage collection runs periodically, making some iterations take longer than others. + Code that runs repeatedly gets optimized by the browser, so the first iteration of any workload is + usually more expensive than the rest.

- For most of the JavaScript and WebAssembly benchmarks in JetStream 3, individual scores - equally weigh startup performance, worst case performance, and average case - performance. These three metrics are crucial to running performant JavaScript - in the browser. Fast startup times lead browsers to loading pages more quickly. Good - worst case performance ensures web applications can run without hiccups. Fast average - case performance makes it so that the most advanced web applications can run at all. + For most of the JavaScript and WebAssembly benchmarks in JetStream 3, individual scores equally weigh + startup performance, worst case performance, and average case performance. + These three metrics are crucial to running performant JavaScript and WebAssembly in the browser. + Fast startup times lead browsers to loading pages more quickly. + Good worst case performance ensures web applications can run without hiccups. + Fast average case performance makes it so that the most advanced web applications can run at all.

- An important component of JetStream 1 were the asm.js subset of benchmarks. With the release - of WebAssembly, the importance of asm.js has lessened since many users of asm.js are - now using WebAssembly. JetStream 3 has converted many of the asm.js benchmarks from - JetStream 1 into WebAssembly. + All but two of JetStream 3's benchmarks run for N iterations, where N is often 120. + JetStream 3 calculates the startup score from the time it takes to run the first iteration. + The worst case score is the average of the worst M iterations, excluding the first iteration. + M is always less than N, and is usually 4. + The average case score is the average of all but the first iteration. + These three scores are weighed equally using the geometric mean.

- All but one of JetStream 3's JavaScript benchmarks run for N iterations, where - N is usually 120. JetStream 3 reports the startup score as the time it takes to run the first iteration. - The worst case score is the average of the worst M iterations, excluding the first iteration. - M is always less than N, and is usually 4. The average case score is the average - of all but the first iteration. These three scores are weighed equally using the geometric - mean. + JetStream 3 also includes a JavaScript benchmark named WSL. + WSL is an implementation of a GPU shading language written in JavaScript. + WSL does not use the above mechanism for scoring because it has a long running time. + Instead, the WSL benchmark computes its score as the geometric mean over two metrics: the time it takes + to compile the WSL standard library, and the time it takes to run through the WSL specification test + suite.

- JetStream 3 also includes a JavaScript benchmark named WSL. WSL is an implementation of a - GPU shading language written in JavaScript. WSL does not use the above mechanism for scoring - because it has a long running time. Instead, the WSL benchmark computes its score as the - geometric mean over two metrics: the time it takes to compile the WSL standard library, and the time - it takes to run through the WSL specification test suite. + JetStream 3 includes parts of these benchmark suites that came before it: + SunSpider, + Octane 2, + JetStream 2, + ARES-6, + Web Tooling Benchmark, + and benchmarks inspired by Kraken. + JetStream 3 also includes a new set of benchmarks that measure the performance of WebAssembly, Web + Workers, Promises, async iteration, unicode regular expressions, and JavaScript parsing.

- JetStream 3 includes parts of these benchmark suites that came before it: SunSpider, - Octane 2, JetStream 1, - ARES-6, Web Tooling Benchmark, and benchmarks - inspired by Kraken. - JetStream 3 also includes a new set of benchmarks that measure the performance of WebAssembly, Web - Workers, - Promises, async iteration, unicode regular expressions, and JavaScript parsing. + Earlier versions of JetStream also contained asm.js workloads. + With the release of WebAssembly, developers should switch to that technology instead. + JetStream 3 thus contains no asm.js workloads any longer and has converted some of the prior ones into + WebAssembly.

- JetStream 3 includes several benchmarks from earlier JetStream versions, but updates the benchmark - driver to - improve score stability. This is achieved by pre-fetching network resources prior to running - the benchmarks. This can reduce perturbations on the measurement of JavaScript execution - time due to second order effects of pause times induced by network latency. + Besides the aforementioned averaging over multiple iterations, the JetStream 3 benchmark driver also + tries to improve score stability by other means. + For example, it pre-fetches network resources prior to running the benchmarks. + This can reduce perturbations on the measurement of JavaScript execution time due to second order + effects of pause times induced by network latency.

@@ -137,19 +138,22 @@

In-Depth Analysis

- JetStream 3 has 64 subtests: + JetStream 3 has 77 default workloads

+

+ The greyed-out workloads are not run by default but can be manually enabled on the command-line or via + the testList URL parameter. +

+
8bitbench-wasm
- A simple 8-bit emulator targeting wasm, written in rust. It aims to represent what an emulator or - small game might act like in the real world. -
- Attribution: See 8bitbench/ABOUT.md -
- Source code: 8bitbench/js3harness.js, based off https://github.com/justinmichaud/8bitbench + A simple 8-bit emulator targeting WebAssembly written in Rust. It aims to represent what an emulator + or small game might act like in the real world. + Attribution: See 8bitbench/README.md. + Source code: In the 8bitbench/ directory, based off https://github.com/justinmichaud/8bitbench.
acorn-wtb
@@ -180,9 +184,10 @@

argon2-wasm
Tests Argon2, a password-hashing function - (the winner of Password Hashing Competition), in WebAssembly. This is test is based on argon2-browser library. Source code: ARGON2. + (the winner of Password Hashing Competition), in WebAssembly. It makes use of the WebAssembly SIMD + feature. + This is test is based on argon2-browser + library. Source code: In the wasm/argon2/ directory.
async-fs
@@ -218,10 +223,10 @@

A similar version of this benchmark was previously published in the Web Tooling Benchmark. Source code: babylon.js

-
babylon-scene-es6
-
babylon-scene-es6
-
babylon-startup-es5
-
babylon-startup-es6
+
babylonjs-scene-es5
+
babylonjs-scene-es6
+
babylonjs-startup-es5
+
babylonjs-startup-es6
TODO
Basic
@@ -233,14 +238,14 @@

This benchmark was previously published in ARES-6. Source code: Basic

-
bigint-bigdenary
+
bigint-bigdenary
BigDenary, an arbitrary-precision decimal arithmetic, implemented in JavaScript by U-Zyn Chua. Tests arithmetic operations on BigInt. Source code: bigdenary-bundle.js
-
bigint-noble-bls12-381
+
bigint-noble-bls12-381
BLS12-381, pairing-friendly Barreto-Lynn-Scott elliptic curve construction, @@ -256,7 +261,7 @@

by Paul Miller. Tests typed arrays and arithmetic operations on BigInt. Source code: noble-ed25519-bundle.js

-
bigint-noble-secp256k1
+
bigint-noble-secp256k1
secp256k1, an elliptic curve that could be used for asymmetric encryption, ECDH key agreement protocol and signature schemes, @@ -264,7 +269,7 @@

by Paul Miller. Tests typed arrays and arithmetic operations on BigInt. Source code: noble-secp256k1-bundle.js

-
bigint-paillier
+
bigint-paillier
Paillier cryptosystem, a probabilistic asymmetric algorithm for public key cryptography, @@ -308,10 +313,19 @@

A similar version of this benchmark was previously published in Octane version 2. Source code: crypto.js

-
Dart-flute-complex-wasm
-
TODO
+
Dart-flute-complex-wasm
Dart-flute-todomvc-wasm
-
TODO
+
+ Two Dart benchmark programs compiled to WasmGC that are using a simplified version of the Flutter UI + framework to layout and animate UI elements. + The complex variant contains a large number of widgets and is thus more of a stress test, + disabled by default in JetStream 3. + The todomvc variant is a more realistic TODO list application and enabled by default. + See Dart/README.md for more information. +
+ Source code: in the Dart/ directory, based off https://github.com/dart-lang/flute. +
delta-blue
The classic DeltaBlue benchmark derived from a Smalltalk implementation by Maloney and @@ -321,18 +335,14 @@

Source code: deltablue.js

dotnet-aot-wasm
-
- Tests .NET on WebAssembly. This benchmark tests - operations - on .NET implementation of String, JSON serialization, specifics of .NET exceptions and computation - of a 3D scene using Mono AOT. Source code: .NET. -
dotnet-interp-wasm
- Tests .NET on WebAssembly. This benchmark tests - operations - on .NET implementation of String, JSON serialization, specifics of .NET exceptions and computation - of a 3D scene using Mono Interpreter. Source code: .NET. + These two workloads test .NET on WebAssembly. + They contain a variety of operations on .NET strings, JSON serialization, specifics of .NET + exceptions, and ray tracing of a 3D scene. + The aot variant uses Mono AOT (ahead of time) compilation. + The interp variant uses the Mono interpreter. + Source code: In the wasm/dotnet/ directory.
doxbee-async
@@ -410,7 +420,7 @@

Source code: gbemu-part1.js, gbemu-part2.js

-
gcc-loops-wasm
+
gcc-loops-wasm
Example loops used to tune the GCC and LLVM vectorizers, compiled to WebAssembly with Emscripten. The original C++ version of this benchmark was @@ -426,17 +436,21 @@

of this benchmark was originally published as part of the WebKit test suite. Source code: hash-map.js

-
HashSet-wasm
+
HashSet-wasm
A WebAssembly benchmark replaying a set of hash table operations performed in WebKit when loading a web page. This benchmark was compiled from C++ to WebAssembly using Emscripten. Source code: HashSet.cpp, HashSet.js
-
intl
+
intl
TODO
j2cl-box2d-wasm
-
TODO
+
+ A Java Box2D benchmark that is compiled to WasmGC with the J2CL toolchain. + Source code: In the wasm/j2cl-box2d/ directory. +
js-tokens
This benchmarks runs js-tokens, a lenient and @@ -463,7 +477,15 @@

Source code: json-stringify-inspector.js

Kotlin-compose-wasm
-
+
+ This benchmark is a Kotlin application using the Compose + Multiplatform UI framework. + Compose allows to share UI code across multiple platforms and is compiled with Kotlin/Wasm to WasmGC. + Source code: In the Kotlin-compose/ directory, based off the benchark + in https://github.com/JetBrains/compose-multiplatform/. +
lazy-collections
This benchmark iterates over common integer sequences (fibonacci, prime numbers, etc) as lazy @@ -472,13 +494,13 @@

generators. Source code: lazy-collections.js

-
+
lebab-wtb
- Lebab transpiles ES5 code into ES6/ES7. - This benchmark runs Lebab on test JavaScript programs. - This benchmark stresses string manipulation and regular expression performance. - A similar version of this benchmark was previously published in the Web Tooling Benchmark. - Source code: lebab.js + Lebab transpiles ES5 code into ES6/ES7. + This benchmark runs Lebab on test JavaScript programs. + This benchmark stresses string manipulation and regular expression performance. + A similar version of this benchmark was previously published in the Web Tooling Benchmark. + Source code: lebab.js
mandreel
@@ -538,9 +560,9 @@

postcss-wtb
TODO
-
+
prettier-wtb
TODO
-
prismjs-startup-es5
+
prismjs-startup-es5
prismjs-startup-es6
TODO
proxy-mobx
@@ -559,7 +581,7 @@

Tests get / set Proxy traps, as well as various Array methods. Source code: vue-benchmark.js

-
quicksort-wasm
+
quicksort-wasm
Quicksort benchmark, compiled to WebAssembly with Emscripten. The original C version of this benchmark was previously published in the LLVM test suite. @@ -609,10 +631,10 @@

richards-wasm
- Martin Richard's system language - benchmark compiled to a hybrid of WebAssembly and JavaScript. It stresses how quickly - JavaScript can call into WebAssembly code. - Source code: richards.c, richards.js + Martin Richard's system language benchmark + compiled to a hybrid of WebAssembly and JavaScript. + It stresses how quickly JavaScript can call into WebAssembly code. + Source code: in the wasm/richards/ directory.
segmentation
@@ -634,7 +656,17 @@

Source code: splay.js

sqlite3-wasm
-
TODO
+
+ A WebAssembly build of SQLite's speedtest1.c + benchmark program. + Quoting from its description: + "This program strives to exercise the SQLite library in a way that is typical of real-world + applications." + Since SQLite is a very widely used database and provides an official and popular upstream + WebAssembly port, this is a realistic, larger WebAssembly program. + See README.md for more information. + Source code: in the sqlite3/ directory. +
stanford-crypto-aes
Measures the performance of the order of existing files. Source code: sync-file-system.js
-
tfjs-wasm
+
tfjs-wasm
+
tfjs-wasm-simd
Tests Tensorflow.js pre-trained machine learning models supported by knn-classifier, coco-ssd, universal-sentence-encoder. - Source code: TFJS. + The SIMD variant uses vector instructions from the Wasm SIMD feature. + Source code: tfjs-* files in the wasm/ directory.
-
tfjs-wasm-simd
-
TODO
threejs
-
transformersjs-bert-wasm"
-
TODO
-
transformersjs-whisper-wasm
-
TODO
+
transformersjs-bert-wasm
+
transformersjs-whisper-wasm
+
+ Two machine learning tasks using the Transformers.js library, which + uses ONNX Runtime Web under the hood to + perform inference with WebAssembly. They make use of WebAssembly SIMD instructions. + The bert variant uses the distilbert-base-uncased-finetuned-sst-2-english + model to perform sentiment analysis of text. + The whisper variant uses whisper-tiny.en to transcribe audio to + text. + Source code: in the transformersjs/ directory. +
+ +
tsf-wasm
- Runs Filip Pizlo's — of the WebKit team — implementation of a Typed Stream Format - in WebAssembly. The original code is compiled from C to WebAssembly using Typed Stream Format in WebAssembly. + The original code is compiled from C to WebAssembly using Emscripten. - Source code: TSF + Source code: in the wasm/TSF/ directory.
typescript-lib
@@ -806,7 +851,7 @@

A similar version of this benchmark was previously published in Octane version 2. Source code: typescript.js

-
typescript-octane
+
typescript-octane
TODO
UniPoker
@@ -830,7 +875,12 @@

Source code: WSL

zlib-wasm
-
zlib-wasm
+
+ This workload compresses and decompresses a WebAssembly binary file using the zlib + library, compiled to WebAssembly via Emscripten. + Source code: In the wasm/zlib/ directory. +

← Return to Tests

diff --git a/resources/JetStream.css b/resources/JetStream.css index 86023eef..00df2ca8 100644 --- a/resources/JetStream.css +++ b/resources/JetStream.css @@ -28,7 +28,7 @@ --color-secondary: #86d9ff; --text-color-inverse: white; --text-color-primary: black; - --text-color-secondary: #555555; + --text-color-secondary: #303030; --text-color-tertiary: #444444; --text-color-subtle: #6c6c71; --text-color-very-subtle: #8e8e93; @@ -228,6 +228,7 @@ article { display: flex; flex-direction: column; gap: var(--gap); + padding-bottom: var(--gap); } h1 { color: var(--text-color-primary); @@ -268,12 +269,22 @@ dt { margin-top: 10px; font-weight: bold; text-align: left; + color: var(--text-color-secondary); +} + +dt.non-default { + color: var(--text-color-very-subtle); +} + +dt.non-default:after { + content: " (not run by default)"; } dd { text-align: left; padding: 10px 20px; margin: 0; + color: var(--text-color-secondary); } a:link, diff --git a/tests/run-browser.mjs b/tests/run-browser.mjs index a5017aec..62317c26 100644 --- a/tests/run-browser.mjs +++ b/tests/run-browser.mjs @@ -176,7 +176,6 @@ async function runTests() { process.exit(1); } - async function runBrowserDriverTest(name, body) { return runTest(name, () => runBrowserDriver(body)) } @@ -259,38 +258,60 @@ async function benchmarkResults(driver) { async function inDepthPageTest(driver) { await driver.get(`http://localhost:${PORT}/in-depth.html`); - const ids = await driver.executeScript(() => { - return Array.from(document.querySelectorAll("#workload-details dt[id]")).map(each => each.id); - }); + const descriptions = await driver.executeScript(() => { + return Array.from(document.querySelectorAll("#workload-details dt[id]")).map(each => { + return [each.id, { text: each.textContent, cssClass: each.className }]; + }); + }).then(entries => new Map(entries)); + + const sectionErrors = [] + + for (const [id, description] of descriptions) { + if (id !== description.text) { + sectionErrors.push( + `Expected dt with id '${id}' to have text content '${id}' but got '${description.text}'`); + } + } + + const ids = Array.from(descriptions.keys()); const sortedIds = ids.slice().sort((a, b) => { return a.toLowerCase().localeCompare(b.toLowerCase()); }); - const sectionErrors = [] sortedIds.forEach((id, index) => { if (id !== ids[index]) { sectionErrors.push( `Expected index ${index} to be '${id}' but got '${ids[index]}' `); } }); - const idSet = new Set(ids); + await driver.get(`http://localhost:${PORT}/index.html?tags=all`); - const benchmarkNames = await driver.executeScript(() => { - return globalThis.JetStream.benchmarks.map(each => each.name); - }); + const benchmarkData = await driver.executeScript(() => { + return globalThis.JetStream.benchmarks.map(each => [each.name, Array.from(each.tags)]); + }).then(entries => new Map(entries)); + + const benchmarkNames = Array.from(benchmarkData.keys()); benchmarkNames.sort((a,b) => { return a.toLowerCase().localeCompare(b.toLowerCase()); }); - const missingIds = benchmarkNames.filter(name => !idSet.has(name)); + const nonDefaultIds = benchmarkNames.filter(name => !benchmarkData.get(name).includes("default")); + for (const id of nonDefaultIds) { + const description = descriptions.get(id); + if (description && description.cssClass !== "non-default") { + sectionErrors.push(`Expected non-default benchmark '${id}' to have CSS class 'non-default' but got '${description.cssClass}'`); + } + } + + const missingIds = benchmarkNames.filter(name => !descriptions.has(name)); if (missingIds.length > 0) { sectionErrors.push(`Missing in-depth.html info section: ${JSON.stringify(missingIds, undefined, 2)}`); } - const benchmarkNamesSet = new Set(benchmarkNames); - const unusedIds = sortedIds.filter(id => !benchmarkNamesSet.has(id)); + const unusedIds = sortedIds.filter(id => !benchmarkData.has(id)); if (unusedIds.length > 0) { sectionErrors.push(`Unused in-depth.html info section: ${JSON.stringify(unusedIds, undefined, 2)}`); } + if (sectionErrors.length > 0) { throw new Error(`info section errors: ${sectionErrors.join("\n")}`); }