Skip to content

Add size/perf optimized FinishSuspension helpers for runtime async#126041

Draft
jakobbotsch wants to merge 5 commits intodotnet:mainfrom
jakobbotsch:optimize-suspension-tail
Draft

Add size/perf optimized FinishSuspension helpers for runtime async#126041
jakobbotsch wants to merge 5 commits intodotnet:mainfrom
jakobbotsch:optimize-suspension-tail

Conversation

@jakobbotsch
Copy link
Member

@jakobbotsch jakobbotsch commented Mar 24, 2026

This adds two helpers, FinishSuspensionNoContinuationContext and FinishSuspensionWithContinuationContext called by the JIT from generated suspension code that does all the necessary context handling needed on suspension for most common cases. Previously the JIT was creating multiple calls to accomplish the context handling. This is both a size and perf win in the generated code; suspension heavy micro benchmark below shows an ~8% perf improvement.

Micro benchmark with warmup
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;

namespace AsyncMicro;

public class Program
{
    static void Main()
    {
        NullAwaiter na = new NullAwaiter();

        for (int i = 0; i < 10; i++)
        {
            for (int j = 0; j < 100; j++)
            {
                Task t = Foo(100, na);
                while (!t.IsCompleted)
                {
                    na.Continue();
                }
            }

            Thread.Sleep(100);
        }

        for (int i = 0; i < 5; i++)
        {
            Task t = Foo(10_000_000, na);
            while (!t.IsCompleted)
            {
                na.Continue();
            }
        }
    }

    static int s_value;
    static async Task Foo(int n, NullAwaiter na)
    {
        for (int i = 0; i < n; i++)
        {
            s_value += i;
        }

        Stopwatch timer = Stopwatch.StartNew();
        for (int i = 0; i < n; i++)
        {
            await na;
        }

        if (n > 100)
            Console.WriteLine("Took {0:F1} ms", timer.Elapsed.TotalMilliseconds);
    }

    private class NullAwaiter : ICriticalNotifyCompletion
    {
        public Action Continue;

        public NullAwaiter GetAwaiter() => this;

        public bool IsCompleted => false;

        public void GetResult()
        {
        }

        public void UnsafeOnCompleted(Action continuation)
        {
            Continue = continuation;
        }

        public void OnCompleted(Action continuation)
        {
            throw new NotImplementedException();
        }
    }
}

This improves performance by about 8%:

-Took 378.3 ms
-Took 380.5 ms
-Took 383.3 ms
-Took 378.9 ms
-Took 379.6 ms
+Took 352.1 ms
+Took 350.0 ms
+Took 350.4 ms
+Took 349.6 ms
+Took 350.5 ms
Codegen diff
@@ -24,7 +24,7 @@ G_M000_IG01:                ;; offset=0x0000
        vmovdqu  ymmword ptr [rbp-0x60], ymm4
        mov      gword ptr [rbp+0x10], rcx
        mov      gword ptr [rbp+0x20], r8
-       mov      ebx, edx
+       mov      esi, edx
  
 G_M000_IG02:                ;; offset=0x0030
        test     rcx, rcx
@@ -42,26 +42,26 @@ G_M000_IG03:                ;; offset=0x0067
        mov      rax, gword ptr [rax+0x10]
        mov      rdx, gword ptr [rax+0x08]
        mov      gword ptr [rbp-0x48], rdx
-       mov      rdx, gword ptr [rax+0x10]
-       mov      gword ptr [rbp-0x50], rdx
+       mov      r8, gword ptr [rax+0x10]
+       mov      gword ptr [rbp-0x50], r8
  
 G_M000_IG04:                ;; offset=0x007B
        xor      eax, eax
-       test     ebx, ebx
+       test     esi, esi
        jle      SHORT G_M000_IG07
  
 G_M000_IG05:                ;; offset=0x0081
-       mov      rdx, 0x7FFA7F47B048
+       mov      rdx, 0x7FFA7F46B048
        align    [0 bytes for IG06]
  
 G_M000_IG06:                ;; offset=0x008B
        add      dword ptr [rdx], eax
        inc      eax
-       cmp      eax, ebx
+       cmp      eax, esi
        jl       SHORT G_M000_IG06
  
 G_M000_IG07:                ;; offset=0x0093
-       mov      rcx, 0x7FFA7F684FF0
+       mov      rcx, 0x7FFA7F674FF0
        call     CORINFO_HELP_NEWSFAST
        mov      rdi, rax
        mov      gword ptr [rbp-0x68], rdi
@@ -76,48 +76,48 @@ G_M000_IG08:                ;; offset=0x00BD
        mov      rdi, gword ptr [rbp-0x68]
        mov      qword ptr [rdi+0x10], rax
        mov      byte  ptr [rdi+0x18], 1
-       cmp      dword ptr [(reloc 0x7ffadf4683e0)], 0
+       cmp      dword ptr [(reloc 0x7ffadf468460)], 0
        jne      SHORT G_M000_IG14
  
 G_M000_IG09:                ;; offset=0x00D8
        mov      rdi, gword ptr [rbp-0x68]
-       test     ebx, ebx
+       test     esi, esi
        jle      SHORT G_M000_IG13
  
 G_M000_IG10:                ;; offset=0x00E0
-       mov      rsi, gword ptr [rbp+0x20]
-       cmp      byte  ptr [rsi], sil
-       mov      r14d, ebx
+       mov      rbx, gword ptr [rbp+0x20]
+       cmp      byte  ptr [rbx], bl
+       mov      r14d, esi
  
-G_M000_IG11:                ;; offset=0x00EA
-       mov      r8, rsi
-       mov      rcx, 0x7FFA7F686100
+G_M000_IG11:                ;; offset=0x00E9
+       mov      r8, rbx
+       mov      rcx, 0x7FFA7F676100
        xor      rdx, rdx
        call     [System.Runtime.CompilerServices.AsyncHelpers:UnsafeAwaitAwaiter[System.__Canon](System.__Canon)]
        test     rcx, rcx
        jne      G_M000_IG22
  
-G_M000_IG12:                ;; offset=0x0108
+G_M000_IG12:                ;; offset=0x0107
        dec      r14d
        jne      SHORT G_M000_IG11
  
-G_M000_IG13:                ;; offset=0x010D
-       cmp      ebx, 100
+G_M000_IG13:                ;; offset=0x010C
+       cmp      esi, 100
        jle      G_M000_IG16
        jmp      SHORT G_M000_IG15
  
-G_M000_IG14:                ;; offset=0x0118
+G_M000_IG14:                ;; offset=0x0117
        call     CORINFO_HELP_POLL_GC
        jmp      SHORT G_M000_IG09
  
-G_M000_IG15:                ;; offset=0x011F
+G_M000_IG15:                ;; offset=0x011E
        mov      rcx, rdi
        call     [System.Diagnostics.Stopwatch:get_ElapsedTicks():long:this]
        vxorps   xmm6, xmm6, xmm6
        vcvtsi2sd xmm6, xmm6, rax
-       mov      rcx, 0x7FFA7F684FF0
+       mov      rcx, 0x7FFA7F674FF0
        call     [CORINFO_HELP_GET_NONGCSTATIC_BASE]
-       vmulsd   xmm0, xmm6, qword ptr [(reloc 0x7ffa7f47b070)]
+       vmulsd   xmm0, xmm6, qword ptr [(reloc 0x7ffa7f46b070)]
        vcmpordsd xmm1, xmm0, xmm0
        vandpd   xmm1, xmm1, xmm0
        mov      rcx, 0x7FFFFFFFFFFFFFFF
@@ -125,25 +125,25 @@ G_M000_IG15:                ;; offset=0x011F
        vucomisd xmm0, qword ptr [reloc @RWD00]
        cmovb    rcx, rax
        mov      qword ptr [rbp-0x58], rcx
-       mov      rcx, 0x7FFA7F565A80
+       mov      rcx, 0x7FFA7F555A80
        call     CORINFO_HELP_NEWSFAST
        mov      rbx, rax
        lea      rcx, [rbp-0x58]
        call     [System.TimeSpan:get_TotalMilliseconds():double:this]
        vmovsd   qword ptr [rbx+0x08], xmm0
        mov      rdx, rbx
-       mov      rcx, 0x1D0BA5B05B8
+       mov      rcx, 0x25A2E3605B8
        call     [System.Console:WriteLine(System.String,System.Object)]
        nop      
  
-G_M000_IG16:                ;; offset=0x01A6
+G_M000_IG16:                ;; offset=0x01A5
        cmp      gword ptr [rbp+0x10], 0
        je       SHORT G_M000_IG20
  
-G_M000_IG17:                ;; offset=0x01AD
+G_M000_IG17:                ;; offset=0x01AC
        xor      ecx, ecx
  
-G_M000_IG18:                ;; offset=0x01AF
+G_M000_IG18:                ;; offset=0x01AE
        vmovaps  xmm6, xmmword ptr [rsp+0x50]
        add      rsp, 104
        pop      rbx
@@ -154,12 +154,12 @@ G_M000_IG18:                ;; offset=0x01AF
        pop      rbp
        ret      
  
-G_M000_IG19:                ;; offset=0x01C2
+G_M000_IG19:                ;; offset=0x01C1
        mov      ecx, 2
        call     CORINFO_HELP_GETDYNAMIC_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED
        jmp      G_M000_IG03
  
-G_M000_IG20:                ;; offset=0x01D1
+G_M000_IG20:                ;; offset=0x01D0
        mov      ecx, 2
        call     CORINFO_HELP_GETDYNAMIC_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED
        mov      rbx, gword ptr [rax+0x10]
@@ -170,7 +170,7 @@ G_M000_IG20:                ;; offset=0x01D1
        mov      rdx, r8
        call     CORINFO_HELP_ASSIGN_REF
  
-G_M000_IG21:                ;; offset=0x01F5
+G_M000_IG21:                ;; offset=0x01F4
        mov      r8, gword ptr [rbx+0x08]
        mov      rdx, gword ptr [rbp-0x48]
        cmp      rdx, r8
@@ -179,7 +179,7 @@ G_M000_IG21:                ;; offset=0x01F5
        call     [System.Threading.ExecutionContext:RestoreChangedContextToThread(System.Threading.Thread,System.Threading.ExecutionContext,System.Threading.ExecutionContext)]
        jmp      SHORT G_M000_IG17
  
-G_M000_IG22:                ;; offset=0x020D
+G_M000_IG22:                ;; offset=0x020C
        mov      rax, rcx
        mov      rcx, gword ptr [rbp+0x10]
        mov      r15, rcx
@@ -190,38 +190,35 @@ G_M000_IG22:                ;; offset=0x020D
        call     CORINFO_HELP_ASSIGN_REF
        jmp      SHORT G_M000_IG24
  
-G_M000_IG23:                ;; offset=0x022A
+G_M000_IG23:                ;; offset=0x0229
        mov      rcx, rax
-       mov      rdx, 0x7FFA7F6CD660
+       mov      rdx, 0x7FFA7F6BD660
        call     [CORINFO_HELP_ALLOC_CONTINUATION]
        mov      r15, rax
        lea      rcx, bword ptr [r15+0x28]
-       mov      rdx, rsi
+       mov      rdx, rbx
        call     CORINFO_HELP_ASSIGN_REF
        lea      rcx, bword ptr [r15+0x30]
        mov      rdx, rdi
        call     CORINFO_HELP_ASSIGN_REF
-       mov      dword ptr [r15+0x38], ebx
+       mov      dword ptr [r15+0x38], esi
  
-G_M000_IG24:                ;; offset=0x025C
-       lea      rax, [reloc @RWD08]
-       mov      qword ptr [r15+0x10], rax
-       xor      eax, eax
-       mov      qword ptr [r15+0x18], rax
+G_M000_IG24:                ;; offset=0x025B
+       lea      rcx, [reloc @RWD08]
+       mov      qword ptr [r15+0x10], rcx
+       xor      ecx, ecx
+       mov      qword ptr [r15+0x18], rcx
        mov      dword ptr [r15+0x3C], r14d
-       call     [System.Runtime.CompilerServices.AsyncHelpers:CaptureExecutionContext():System.Threading.ExecutionContext]
        lea      rcx, bword ptr [r15+0x20]
-       mov      rdx, rax
-       call     CORINFO_HELP_ASSIGN_REF
        cmp      gword ptr [rbp+0x10], 0
-       setne    cl
-       movzx    rcx, cl
-       mov      rdx, gword ptr [rbp-0x48]
-       mov      r8, gword ptr [rbp-0x50]
-       call     [System.Runtime.CompilerServices.AsyncHelpers:RestoreContextsOnSuspension(bool,System.Threading.ExecutionContext,System.Threading.SynchronizationContext)]
+       setne    dl
+       movzx    rdx, dl
+       mov      r8, gword ptr [rbp-0x48]
+       mov      r9, gword ptr [rbp-0x50]
+       call     [System.Runtime.CompilerServices.AsyncHelpers:FinishSuspensionNoContinuationContext(byref,bool,System.Threading.ExecutionContext,System.Threading.SynchronizationContext)]
        mov      rcx, r15
  
-G_M000_IG25:                ;; offset=0x029F
+G_M000_IG25:                ;; offset=0x0290
        vmovaps  xmm6, xmmword ptr [rsp+0x50]
        add      rsp, 104
        pop      rbx
@@ -232,22 +229,22 @@ G_M000_IG25:                ;; offset=0x029F
        pop      rbp
        ret      
  
-G_M000_IG26:                ;; offset=0x02B2
+G_M000_IG26:                ;; offset=0x02A3
        mov      rcx, gword ptr [rbp+0x10]
        mov      rcx, gword ptr [rcx+0x20]
        call     [System.Runtime.CompilerServices.AsyncHelpers:RestoreExecutionContext(System.Threading.ExecutionContext)]
        mov      rcx, gword ptr [rbp+0x10]
-       mov      rsi, gword ptr [rcx+0x28]
+       mov      rbx, gword ptr [rcx+0x28]
        mov      rdi, gword ptr [rcx+0x30]
-       mov      ebx, dword ptr [rcx+0x38]
+       mov      esi, dword ptr [rcx+0x38]
        mov      r14d, dword ptr [rcx+0x3C]
        jmp      G_M000_IG12
  
-G_M000_IG27:                ;; offset=0x02D8
+G_M000_IG27:                ;; offset=0x02C9
        sub      rsp, 40
        vzeroupper 
  
-G_M000_IG28:                ;; offset=0x02DF
+G_M000_IG28:                ;; offset=0x02D0
        cmp      gword ptr [rbp+0x10], 0
        setne    cl
        movzx    rcx, cl
@@ -256,7 +253,7 @@ G_M000_IG28:                ;; offset=0x02DF
        call     [System.Runtime.CompilerServices.AsyncHelpers:RestoreContexts(bool,System.Threading.ExecutionContext,System.Threading.SynchronizationContext)]
        nop      
  
-G_M000_IG29:                ;; offset=0x02F9
+G_M000_IG29:                ;; offset=0x02EA
        add      rsp, 40
        ret      
  
@@ -264,4 +261,4 @@ RWD00  	dq	43E0000000000000h
 RWD08  	dq	(dynamicClass):IL_STUB_AsyncResume(System.Object,byref):System.Object
 	dq	G_M000_IG22 + 3
 
-; Total bytes of code 766
+; Total bytes of code 751

Copilot AI review requested due to automatic review settings March 24, 2026 16:48
@github-actions github-actions bot added the area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI label Mar 24, 2026
@dotnet-policy-service
Copy link
Contributor

Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch
See info in area-owners.md if you want to be subscribed.

Copy link
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

This PR introduces two new AsyncHelpers runtime helpers that allow the JIT to finish async suspension context handling with a single call, reducing generated code size and improving performance by avoiding multiple calls / repeated Thread TLS access.

Changes:

  • Add AsyncHelpers.FinishSuspensionNoContinuationContext / FinishSuspensionWithContinuationContext and update the JIT async suspension lowering to use them when execution context capture is needed.
  • Plumb the new helper method handles through CORINFO_ASYNC_INFO and CoreLib binder + jitinterface, and teach R2R/NativeAOT dependency scanning to reference the methods.
  • Add an ExecutionContext internal instance property to cheaply query flow-suppressed state from the helper.

Reviewed changes

Copilot reviewed 11 out of 11 changed files in this pull request and generated 7 comments.

Show a summary per file
File Description
src/libraries/System.Private.CoreLib/src/System/Threading/ExecutionContext.cs Exposes flow-suppressed state via a new internal instance property for use by suspension helpers.
src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs Adds the new FinishSuspension helper implementations used by JIT-generated suspension code.
src/coreclr/inc/corinfo.h Extends CORINFO_ASYNC_INFO with two new helper method handles.
src/coreclr/vm/jitinterface.cpp Populates the new helper handles for the JIT via CEEInfo::getAsyncInfo.
src/coreclr/vm/corelib.h Adds CoreLib binder entries for the two new helper methods.
src/coreclr/jit/async.h Declares new JIT helper routines to emit the consolidated suspension context handling call.
src/coreclr/jit/async.cpp Implements the new suspension context handling emission path using the new helpers.
src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs Updates managed CORINFO_ASYNC_INFO struct layout to match native.
src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs Supplies the new helper method handles in the managed JIT interface implementation.
src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs Adds the new helper methods as necessary async references for R2R compilation.
src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs Ensures async scanning records dependencies on the new helper methods.

Copilot AI review requested due to automatic review settings March 24, 2026 17:01
Copy link
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

Copilot reviewed 11 out of 11 changed files in this pull request and generated 1 comment.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI runtime-async

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants