-
-
Notifications
You must be signed in to change notification settings - Fork 33.8k
gh-139922: Tail calling for MSVC (VS 2026) #139962
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
82d1259
3248658
9ac430d
085c1d7
0b12f2e
acf48f5
35e96c1
d7737e9
86f19cf
40013cc
48db59e
bc9d23c
19e02c2
66ec774
50f8ff7
5d908b4
0786133
e699d40
66d6c39
5584fec
7eeeaa8
6f3d525
81618e2
2008d1d
7c84388
68b41cf
c7316fc
9214d5b
52c6f9c
34d98d3
7ec626e
ad1c5a2
4155337
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -303,9 +303,9 @@ PyAPI_FUNC(PyObject *)_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, Py | |
| PyAPI_FUNC(void) _PyEval_MonitorRaise(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *instr); | ||
| PyAPI_FUNC(int) _PyEval_UnpackIterableStackRef(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, _PyStackRef *sp); | ||
| PyAPI_FUNC(void) _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); | ||
| PyAPI_FUNC(PyObject **) _PyObjectArray_FromStackRefArray(_PyStackRef *input, Py_ssize_t nargs, PyObject **scratch); | ||
| PyAPI_FUNC(PyObject **) _PyObjectArray_FromStackRefArray(_PyThreadStateImpl *_tstate, _PyStackRef *input, Py_ssize_t nargs); | ||
|
|
||
| PyAPI_FUNC(void) _PyObjectArray_Free(PyObject **array, PyObject **scratch); | ||
| PyAPI_FUNC(void) _PyObjectArray_Free(_PyThreadStateImpl *_tstate, PyObject **array, Py_ssize_t nargs, PyObject **temp_arr); | ||
|
|
||
| PyAPI_FUNC(PyObject *) _PyEval_GetANext(PyObject *aiter); | ||
| PyAPI_FUNC(void) _PyEval_LoadGlobalStackRef(PyObject *globals, PyObject *builtins, PyObject *name, _PyStackRef *writeto); | ||
|
|
@@ -391,6 +391,28 @@ _PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame | |
| #define SPECIAL___AEXIT__ 3 | ||
| #define SPECIAL_MAX 3 | ||
|
|
||
| // Special counterparts of ceval functions for performance reasons | ||
| PyAPI_FUNC(int) _PyEval_Mapping_GetOptionalItem(PyObject *obj, PyObject *key, PyObject **result); | ||
|
|
||
| #if defined(_MSC_VER) && !defined(__clang__) && _Py_TAIL_CALL_INTERP | ||
| # define Py_NO_INLINE_MSVC_TAILCALL Py_NO_INLINE | ||
| #else | ||
| # define Py_NO_INLINE_MSVC_TAILCALL | ||
| #endif | ||
|
|
||
| // Tells the compiler that this variable cannot be alised. | ||
|
||
| #if defined(_MSC_VER) && !defined(__clang__) | ||
| # define Py_UNALIASED(var) restrict var | ||
| #else | ||
| # define Py_UNALIASED(var) var | ||
| #endif | ||
|
|
||
| // Just a scope. Hints to the programmer and compiler | ||
| // That any local variable defined within this block MUST | ||
| // not escape from the current definition. | ||
| # define Py_BEGIN_LOCALS_MUST_NOT_ESCAPE { | ||
| # define Py_END_LOCALS_MUST_NOT_ESCAPE } | ||
|
|
||
| #ifdef __cplusplus | ||
| } | ||
| #endif | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,9 @@ struct _gc_thread_state { | |
| }; | ||
| #endif | ||
|
|
||
| /* How much scratch space to give stackref to PyObject* conversion. */ | ||
| #define MAX_STACKREF_SCRATCH 1024 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How arbitrary is this amount? What is the usual amount of space required for conversions?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We used to arbitrarily always consume 10 PyObject/PyStackRef per call. Meaning even if you had a single item, the whole 10 slot space would be used. |
||
|
|
||
| // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The | ||
| // PyThreadState fields are exposed as part of the C API, although most fields | ||
| // are intended to be private. The _PyThreadStateImpl fields not exposed. | ||
|
|
@@ -47,6 +50,8 @@ typedef struct _PyThreadStateImpl { | |
| struct _qsbr_thread_state *qsbr; // only used by free-threaded build | ||
| struct llist_node mem_free_queue; // delayed free queue | ||
|
|
||
| PyObject *stackref_scratch[MAX_STACKREF_SCRATCH]; | ||
| int n_stackref_scratch_used; | ||
| #ifdef Py_GIL_DISABLED | ||
| // Stack references for the current thread that exist on the C stack | ||
| struct _PyCStackRef *c_stack_refs; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Allow building CPython with the tail calling interpreter on Visual Studio 2026 MSVC. This provides a performance gain over the prior interpreter for MSVC. Patch by Ken Jin, Brandt Bucher, and Chris Eibl. With help from the MSVC team including Hulon Jenkins. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -595,7 +595,9 @@ | |
| <ClCompile Include="..\Python\bltinmodule.c" /> | ||
| <ClCompile Include="..\Python\bootstrap_hash.c" /> | ||
| <ClCompile Include="..\Python\brc.c" /> | ||
| <ClCompile Include="..\Python\ceval.c" /> | ||
| <ClCompile Include="..\Python\ceval.c"> | ||
| <AdditionalOptions Condition="'$(UseTailCallInterp)' == 'true' and $(PlatformToolset) != 'ClangCL'">/std:clatest %(AdditionalOptions)</AdditionalOptions> | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if we pass this option to clang-cl? Does it break? Any possibility of passing a specific /std:c rather than "latest"?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I've worked with Chris on this and apparently it builds with clang-cl still the last time I checked with him? @chris-eibl
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yupp, clang-cl doesn't like
But I assume MSVC needs
Yeah, we could explicitely enforce
Yes, it still does. Having done a lot of builds recently and still playing around.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm under the impression we need clatest for msvc musttail as well.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've just verified again:
So atm we're stuck with this construct until MSVC introduces a new "fixed" option (or maybe accepts I agree with Steve #135927 (comment)
and we should revisit this once we know a fixed version option for it. |
||
| </ClCompile> | ||
| <ClCompile Include="..\Python\codecs.c" /> | ||
| <ClCompile Include="..\Python\codegen.c" /> | ||
| <ClCompile Include="..\Python\compile.c" /> | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the only difference the restrict annotation?
We could just add the annotation to
PyMapping_GetOptionalItemtheresultpointer can never alias the other parameters.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. We can't because
PyMapping_GetOptionalItemis part of the public API, and we can't change the signature of it.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We aren't changing the signature, just telling the compiler than one of its parameters can't alias the others.
I'm surprised we need to do this anyway. I would have thought that strict aliasing already tells MSVC that they can't alias.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My understanding is, that strict aliasing can't help here, since in
all parameters are of type
PyObject. And by usingrestrictwe tell the compiler that "we assure you, that none of thesePyObjects point to the same memory", and if we brake that contract, UB kicks in.