From f09764e1529ebe68f6fd9d31f632ac809781563c Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 22 Apr 2025 12:49:45 -0700 Subject: [PATCH 1/3] Mark a handful of functions on the calling-into-Wasm path as `#[inline]` This provides an improvement across the board for our `sync/no-hook` benchmarks:
Benchmark Results ``` $ cargo bench --profile profiling --bench call '\bsync/no-hook' -- --baseline main Finished `profiling` profile [optimized + debuginfo] target(s) in 0.28s Running benches/call.rs (target/profiling/deps/call-b0a2bedd3336ad76) sync/no-hook/core - host-to-wasm - typed - nop time: [27.334 ns 27.499 ns 27.668 ns] change: [-16.388% -14.870% -13.479%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 5 (5.00%) high mild 2 (2.00%) high severe sync/no-hook/core - host-to-wasm - untyped - nop time: [44.141 ns 44.429 ns 44.757 ns] change: [-18.380% -17.041% -15.670%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 1 (1.00%) high mild 2 (2.00%) high severe sync/no-hook/core - host-to-wasm - unchecked - nop time: [29.731 ns 29.983 ns 30.262 ns] change: [-25.104% -22.176% -19.159%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 5 (5.00%) high mild 2 (2.00%) high severe sync/no-hook/core - host-to-wasm - typed - nop-params-and-results time: [28.990 ns 29.143 ns 29.303 ns] change: [-25.804% -24.562% -23.372%] (p = 0.00 < 0.05) Performance has improved. Found 6 outliers among 100 measurements (6.00%) 3 (3.00%) high mild 3 (3.00%) high severe sync/no-hook/core - host-to-wasm - untyped - nop-params-and-results time: [110.00 ns 110.65 ns 111.46 ns] change: [-11.967% -9.0070% -6.1347%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 2 (2.00%) high mild 5 (5.00%) high severe sync/no-hook/core - host-to-wasm - unchecked - nop-params-and-results time: [58.828 ns 59.089 ns 59.418 ns] change: [-15.596% -13.573% -11.484%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 3 (3.00%) high severe sync/no-hook/core - wasm-to-host - typed - nop time: [6.6209 ns 6.6615 ns 6.7077 ns] change: [-53.555% -52.878% -52.116%] (p = 0.00 < 0.05) Performance has improved. Found 6 outliers among 100 measurements (6.00%) 5 (5.00%) high mild 1 (1.00%) high severe sync/no-hook/core - wasm-to-host - typed - nop-params-and-results time: [7.9783 ns 8.0173 ns 8.0611 ns] change: [-54.341% -53.947% -53.505%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 3 (3.00%) high severe sync/no-hook/core - wasm-to-host - untyped - nop time: [18.306 ns 18.393 ns 18.491 ns] change: [-29.104% -28.127% -27.171%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 3 (3.00%) high mild 4 (4.00%) high severe sync/no-hook/core - wasm-to-host - untyped - nop-params-and-results time: [67.741 ns 68.120 ns 68.601 ns] change: [-26.453% -25.061% -23.663%] (p = 0.00 < 0.05) Performance has improved. Found 12 outliers among 100 measurements (12.00%) 6 (6.00%) high mild 6 (6.00%) high severe sync/no-hook/core - wasm-to-host - unchecked - nop time: [6.8379 ns 6.8915 ns 6.9566 ns] change: [-55.623% -55.062% -54.481%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 5 (5.00%) high mild 2 (2.00%) high severe sync/no-hook/core - wasm-to-host - unchecked - nop-params-and-results time: [27.856 ns 28.024 ns 28.214 ns] change: [-17.320% -16.103% -15.038%] (p = 0.00 < 0.05) Performance has improved. Found 10 outliers among 100 measurements (10.00%) 6 (6.00%) high mild 4 (4.00%) high severe sync/no-hook/component - host-to-wasm - typed - nop time: [55.126 ns 55.506 ns 55.932 ns] change: [-19.458% -18.098% -16.736%] (p = 0.00 < 0.05) Performance has improved. Found 8 outliers among 100 measurements (8.00%) 2 (2.00%) high mild 6 (6.00%) high severe sync/no-hook/component - host-to-wasm - untyped - nop time: [101.42 ns 102.06 ns 102.82 ns] change: [-15.679% -14.108% -12.523%] (p = 0.00 < 0.05) Performance has improved. Found 9 outliers among 100 measurements (9.00%) 7 (7.00%) high mild 2 (2.00%) high severe sync/no-hook/component - host-to-wasm - typed - nop-params-and-results time: [61.482 ns 62.017 ns 62.591 ns] change: [-16.576% -15.100% -13.595%] (p = 0.00 < 0.05) Performance has improved. Found 10 outliers among 100 measurements (10.00%) 9 (9.00%) high mild 1 (1.00%) high severe sync/no-hook/component - host-to-wasm - untyped - nop-params-and-results time: [223.50 ns 224.72 ns 226.05 ns] change: [-21.732% -20.178% -18.679%] (p = 0.00 < 0.05) Performance has improved. Found 4 outliers among 100 measurements (4.00%) 1 (1.00%) high mild 3 (3.00%) high severe sync/no-hook/component - wasm-to-host - typed - nop time: [39.115 ns 39.295 ns 39.500 ns] change: [-15.139% -13.886% -12.721%] (p = 0.00 < 0.05) Performance has improved. Found 8 outliers among 100 measurements (8.00%) 1 (1.00%) low mild 2 (2.00%) high mild 5 (5.00%) high severe sync/no-hook/component - wasm-to-host - typed - nop-params-and-results time: [47.234 ns 47.458 ns 47.745 ns] change: [-13.833% -11.951% -9.8784%] (p = 0.00 < 0.05) Performance has improved. Found 9 outliers among 100 measurements (9.00%) 3 (3.00%) high mild 6 (6.00%) high severe sync/no-hook/component - wasm-to-host - untyped - nop time: [52.311 ns 52.556 ns 52.817 ns] change: [-12.736% -11.712% -10.693%] (p = 0.00 < 0.05) Performance has improved. Found 9 outliers among 100 measurements (9.00%) 4 (4.00%) high mild 5 (5.00%) high severe sync/no-hook/component - wasm-to-host - untyped - nop-params-and-results time: [239.71 ns 241.59 ns 244.11 ns] change: [-29.804% -28.173% -26.415%] (p = 0.00 < 0.05) Performance has improved. Found 9 outliers among 100 measurements (9.00%) 4 (4.00%) high mild 5 (5.00%) high severe ```
--- .gitignore | 1 + crates/wasmtime/src/runtime/func.rs | 5 +++++ crates/wasmtime/src/runtime/func/typed.rs | 2 ++ crates/wasmtime/src/runtime/types.rs | 1 + crates/wasmtime/src/runtime/vm/interpreter.rs | 1 + crates/wasmtime/src/runtime/vm/vmcontext.rs | 2 ++ 6 files changed, 12 insertions(+) diff --git a/.gitignore b/.gitignore index 38fc795e7da8..bd3e923d40dd 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ tests/all/pulley_provenance_test.cwasm /artifacts testcase*.wat testcase*.wasm +perf.data* diff --git a/crates/wasmtime/src/runtime/func.rs b/crates/wasmtime/src/runtime/func.rs index b6bf00325c63..2523b9bbd54a 100644 --- a/crates/wasmtime/src/runtime/func.rs +++ b/crates/wasmtime/src/runtime/func.rs @@ -943,6 +943,7 @@ impl Func { /// `StoreOpaque` while the `FuncType` is also being used (from the /// perspective of the borrow-checker) because otherwise the signature would /// consider `StoreOpaque` borrowed mutable while `FuncType` is in use. + #[inline] fn ty_ref<'a>(&self, store: &'a mut StoreOpaque) -> (&'a FuncType, &'a StoreOpaque) { // If we haven't loaded our type into the store yet then do so lazily at // this time. @@ -1178,6 +1179,7 @@ impl Func { /// This must be called just before `call_impl_do_call`. /// /// Returns whether we need to GC before calling `call_impl_do_call`. + #[inline] fn call_impl_check_args( &self, store: &mut StoreContextMut<'_, T>, @@ -1234,6 +1236,7 @@ impl Func { /// You must have type checked the arguments by calling /// `call_impl_check_args` immediately before calling this function. It is /// only safe to call this function if that one did not return an error. + #[inline] unsafe fn call_impl_do_call( &self, store: &mut StoreContextMut<'_, T>, @@ -1737,6 +1740,7 @@ impl EntryStoreContext { /// function through this type's `Drop` implementation. This ensures that we /// even restore the values if we unwind the stack (e.g., because we are /// panicing out of a Wasm execution). + #[inline] fn exit_wasm(&mut self) { unsafe { if let Some(limit) = self.stack_limit { @@ -1751,6 +1755,7 @@ impl EntryStoreContext { } impl Drop for EntryStoreContext { + #[inline] fn drop(&mut self) { self.exit_wasm(); } diff --git a/crates/wasmtime/src/runtime/func/typed.rs b/crates/wasmtime/src/runtime/func/typed.rs index 8936bd5fd2cd..2b0fedffe750 100644 --- a/crates/wasmtime/src/runtime/func/typed.rs +++ b/crates/wasmtime/src/runtime/func/typed.rs @@ -93,6 +93,7 @@ where /// connected to an asynchronous store. /// /// [`Trap`]: crate::Trap + #[inline] pub fn call(&self, mut store: impl AsContextMut, params: Params) -> Result { let mut store = store.as_context_mut(); assert!( @@ -179,6 +180,7 @@ where /// /// If `Self::need_gc_before_call_raw`, then the caller must have done a GC /// just before calling this method. + #[inline] pub(crate) unsafe fn call_raw( store: &mut StoreContextMut<'_, T>, ty: &FuncType, diff --git a/crates/wasmtime/src/runtime/types.rs b/crates/wasmtime/src/runtime/types.rs index 8c9a5258e555..9651548c196c 100644 --- a/crates/wasmtime/src/runtime/types.rs +++ b/crates/wasmtime/src/runtime/types.rs @@ -2384,6 +2384,7 @@ impl FuncType { } #[cfg(feature = "gc")] + #[inline] pub(crate) fn as_wasm_func_type(&self) -> &WasmFuncType { self.registered_type.unwrap_func() } diff --git a/crates/wasmtime/src/runtime/vm/interpreter.rs b/crates/wasmtime/src/runtime/vm/interpreter.rs index 5a82b5bd10bb..27f133556ed4 100644 --- a/crates/wasmtime/src/runtime/vm/interpreter.rs +++ b/crates/wasmtime/src/runtime/vm/interpreter.rs @@ -82,6 +82,7 @@ impl InterpreterRef<'_> { /// The `bytecode` pointer should previously have been produced by Cranelift /// and `callee` / `caller` / `args_and_results` are normal array-call /// arguments being passed around. + #[inline(never)] pub unsafe fn call( mut self, mut bytecode: NonNull, diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs index a49cccf02cb7..99177d3de4ba 100644 --- a/crates/wasmtime/src/runtime/vm/vmcontext.rs +++ b/crates/wasmtime/src/runtime/vm/vmcontext.rs @@ -833,6 +833,7 @@ impl VMFuncRef { /// /// Note that the unsafety invariants to maintain here are not currently /// exhaustively documented. + #[inline] pub unsafe fn array_call( &self, pulley: Option>, @@ -867,6 +868,7 @@ impl VMFuncRef { ) } + #[inline] unsafe fn array_call_native( &self, caller: NonNull, From f6763ee3bca380f46611223b16b5afc007ee2831 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 22 Apr 2025 14:11:48 -0700 Subject: [PATCH 2/3] remove inline(never) on interpreter method --- crates/wasmtime/src/runtime/vm/interpreter.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/wasmtime/src/runtime/vm/interpreter.rs b/crates/wasmtime/src/runtime/vm/interpreter.rs index 27f133556ed4..5a82b5bd10bb 100644 --- a/crates/wasmtime/src/runtime/vm/interpreter.rs +++ b/crates/wasmtime/src/runtime/vm/interpreter.rs @@ -82,7 +82,6 @@ impl InterpreterRef<'_> { /// The `bytecode` pointer should previously have been produced by Cranelift /// and `callee` / `caller` / `args_and_results` are normal array-call /// arguments being passed around. - #[inline(never)] pub unsafe fn call( mut self, mut bytecode: NonNull, From 0299be6ce195cc94449daa4b2ba12e2c0fd7c539 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 24 Apr 2025 08:47:18 -0700 Subject: [PATCH 3/3] Remove `#[inline]` from larger methods --- crates/wasmtime/src/runtime/func.rs | 2 -- crates/wasmtime/src/runtime/func/typed.rs | 1 - 2 files changed, 3 deletions(-) diff --git a/crates/wasmtime/src/runtime/func.rs b/crates/wasmtime/src/runtime/func.rs index 2523b9bbd54a..7652eea62ea0 100644 --- a/crates/wasmtime/src/runtime/func.rs +++ b/crates/wasmtime/src/runtime/func.rs @@ -1179,7 +1179,6 @@ impl Func { /// This must be called just before `call_impl_do_call`. /// /// Returns whether we need to GC before calling `call_impl_do_call`. - #[inline] fn call_impl_check_args( &self, store: &mut StoreContextMut<'_, T>, @@ -1236,7 +1235,6 @@ impl Func { /// You must have type checked the arguments by calling /// `call_impl_check_args` immediately before calling this function. It is /// only safe to call this function if that one did not return an error. - #[inline] unsafe fn call_impl_do_call( &self, store: &mut StoreContextMut<'_, T>, diff --git a/crates/wasmtime/src/runtime/func/typed.rs b/crates/wasmtime/src/runtime/func/typed.rs index 2b0fedffe750..a5fb841dec49 100644 --- a/crates/wasmtime/src/runtime/func/typed.rs +++ b/crates/wasmtime/src/runtime/func/typed.rs @@ -180,7 +180,6 @@ where /// /// If `Self::need_gc_before_call_raw`, then the caller must have done a GC /// just before calling this method. - #[inline] pub(crate) unsafe fn call_raw( store: &mut StoreContextMut<'_, T>, ty: &FuncType,