Skip to content

Commit e0cd0f5

Browse files
committed
Merge commit '374874c3e6d8c4745a52b3bc22044db607b2c9e7' into msvc-tail-call-new
2 parents b8d3fdd + 374874c commit e0cd0f5

File tree

12 files changed

+484
-650
lines changed

12 files changed

+484
-650
lines changed

.github/workflows/tail-call.yml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,19 +79,17 @@ jobs:
7979
with:
8080
python-version: '3.11'
8181

82-
- name: Native Windows (debug)
82+
- name: Native Windows MSVC (PGO)
8383
if: runner.os == 'Windows' && matrix.architecture != 'ARM64'
8484
shell: cmd
8585
run: |
86-
choco install llvm --allow-downgrade --no-progress --version ${{ matrix.llvm }}.1.0
87-
set PlatformToolset=clangcl
88-
set LLVMToolsVersion=${{ matrix.llvm }}.1.0
89-
set LLVMInstallDir=C:\Program Files\LLVM
90-
call ./PCbuild/build.bat --tail-call-interp -d -p ${{ matrix.architecture }}
91-
call ./PCbuild/rt.bat -d -p ${{ matrix.architecture }} -q --multiprocess 0 --timeout 4500 --verbose2 --verbose3
86+
choco install visualstudio2026buildtools-preview --pre -allWorkloads
87+
set PATH=C:\Program Files\Microsoft Visual Studio\18\Insiders\MSBuild\Current\Bin\;%PATH%
88+
./PCbuild/build.bat --tail-call-interp --pgo -p ${{ matrix.architecture }} "/p:PlatformToolset=v145"
89+
./PCbuild/rt.bat -p ${{ matrix.architecture }} -q --multiprocess 0 --timeout 4500 --verbose2 --verbose3
9290
9391
# No tests (yet):
94-
- name: Emulated Windows (release)
92+
- name: Emulated Windows Clang (release)
9593
if: runner.os == 'Windows' && matrix.architecture == 'ARM64'
9694
shell: cmd
9795
run: |

Include/internal/pycore_ceval.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,17 @@ _Py_VectorCall_StackRefSteal(
415415
int total_args,
416416
_PyStackRef kwnames);
417417

418+
PyAPI_FUNC(PyObject*)
419+
_Py_VectorCallInstrumentation_StackRefSteal(
420+
_PyStackRef callable,
421+
_PyStackRef* arguments,
422+
int total_args,
423+
_PyStackRef kwnames,
424+
bool call_instrumentation,
425+
_PyInterpreterFrame* frame,
426+
_Py_CODEUNIT* this_instr,
427+
PyThreadState* tstate);
428+
418429
PyAPI_FUNC(PyObject *)
419430
_Py_BuiltinCallFast_StackRefSteal(
420431
_PyStackRef callable,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow building CPython with the tail calling interpreter on Visual Studio 2026 MSVC. This provides a performance gain over the prior interpreter for MSVC. Patch by Ken Jin, Brandt Bucher, and Chris Eibl. With help from the MSVC team including Hulon Jenkins.

Objects/abstract.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,14 @@ PyObject_GetItem(PyObject *o, PyObject *key)
205205
return type_error("'%.200s' object is not subscriptable", o);
206206
}
207207

208+
// MSVC fails during a tail call release build with loads of
209+
// error C4737: Unable to perform required tail call.
210+
// without using Py_NO_INLINE here, but PGO works fine.
211+
#if defined(_MSC_VER) && !defined(__clang__) && _Py_TAIL_CALL_INTERP && !defined(_Py_USING_PGO)
212+
Py_NO_INLINE
213+
#endif
208214
int
209-
PyMapping_GetOptionalItem(PyObject *obj, PyObject *key, PyObject **result)
215+
PyMapping_GetOptionalItem(PyObject *obj, PyObject *key, PyObject **restrict result)
210216
{
211217
if (PyDict_CheckExact(obj)) {
212218
return PyDict_GetItemRef(obj, key, result);

PCbuild/pythoncore.vcxproj

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,9 @@
600600
<ClCompile Include="..\Python\bltinmodule.c" />
601601
<ClCompile Include="..\Python\bootstrap_hash.c" />
602602
<ClCompile Include="..\Python\brc.c" />
603-
<ClCompile Include="..\Python\ceval.c" />
603+
<ClCompile Include="..\Python\ceval.c">
604+
<AdditionalOptions Condition="'$(UseTailCallInterp)' == 'true' and $(PlatformToolset) != 'ClangCL'">/std:clatest %(AdditionalOptions)</AdditionalOptions>
605+
</ClCompile>
604606
<ClCompile Include="..\Python\codecs.c" />
605607
<ClCompile Include="..\Python\codegen.c" />
606608
<ClCompile Include="..\Python\compile.c" />

Python/bytecodes.c

Lines changed: 43 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2185,8 +2185,11 @@ dummy_func(
21852185
}
21862186
// we make no attempt to optimize here; specializations should
21872187
// handle any case whose performance we care about
2188-
PyObject *stack[] = {class, self};
2189-
PyObject *super = PyObject_Vectorcall(global_super, stack, oparg & 2, NULL);
2188+
PyObject *super;
2189+
{
2190+
PyObject *stack[] = {class, self};
2191+
super = PyObject_Vectorcall(global_super, stack, oparg & 2, NULL);
2192+
}
21902193
if (opcode == INSTRUMENTED_LOAD_SUPER_ATTR) {
21912194
PyObject *arg = oparg & 2 ? class : &_PyInstrumentation_MISSING;
21922195
if (super == NULL) {
@@ -2245,8 +2248,12 @@ dummy_func(
22452248
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 2);
22462249
PyTypeObject *cls = (PyTypeObject *)class;
22472250
int method_found = 0;
2248-
PyObject *attr_o = _PySuper_Lookup(cls, self, name,
2249-
Py_TYPE(self)->tp_getattro == PyObject_GenericGetAttr ? &method_found : NULL);
2251+
PyObject *attr_o;
2252+
{
2253+
int *method_found_ptr = &method_found;
2254+
attr_o = _PySuper_Lookup(cls, self, name,
2255+
Py_TYPE(self)->tp_getattro == PyObject_GenericGetAttr ? method_found_ptr : NULL);
2256+
}
22502257
if (attr_o == NULL) {
22512258
ERROR_NO_POP();
22522259
}
@@ -3472,10 +3479,13 @@ dummy_func(
34723479
}
34733480
assert(PyStackRef_IsTaggedInt(lasti));
34743481
(void)lasti; // Shut up compiler warning if asserts are off
3475-
PyObject *stack[5] = {NULL, PyStackRef_AsPyObjectBorrow(exit_self), exc, val_o, tb};
3476-
int has_self = !PyStackRef_IsNull(exit_self);
3477-
PyObject *res_o = PyObject_Vectorcall(exit_func_o, stack + 2 - has_self,
3478-
(3 + has_self) | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
3482+
PyObject* res_o;
3483+
{
3484+
PyObject *stack[5] = {NULL, PyStackRef_AsPyObjectBorrow(exit_self), exc, val_o, tb};
3485+
int has_self = !PyStackRef_IsNull(exit_self);
3486+
res_o = PyObject_Vectorcall(exit_func_o, stack + 2 - has_self,
3487+
(3 + has_self) | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
3488+
}
34793489
Py_XDECREF(original_tb);
34803490
ERROR_IF(res_o == NULL);
34813491
res = PyStackRef_FromPyObjectSteal(res_o);
@@ -3707,36 +3717,18 @@ dummy_func(
37073717
frame->return_offset = INSTRUCTION_SIZE;
37083718
DISPATCH_INLINED(new_frame);
37093719
}
3710-
/* Callable is not a normal Python function */
3711-
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
3712-
if (CONVERSION_FAILED(args_o)) {
3713-
DECREF_INPUTS();
3714-
ERROR_IF(true);
3715-
}
3716-
PyObject *res_o = PyObject_Vectorcall(
3717-
callable_o, args_o,
3718-
total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
3719-
NULL);
3720-
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
3721-
if (opcode == INSTRUMENTED_CALL) {
3722-
PyObject *arg = total_args == 0 ?
3723-
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]);
3724-
if (res_o == NULL) {
3725-
_Py_call_instrumentation_exc2(
3726-
tstate, PY_MONITORING_EVENT_C_RAISE,
3727-
frame, this_instr, callable_o, arg);
3728-
}
3729-
else {
3730-
int err = _Py_call_instrumentation_2args(
3731-
tstate, PY_MONITORING_EVENT_C_RETURN,
3732-
frame, this_instr, callable_o, arg);
3733-
if (err < 0) {
3734-
Py_CLEAR(res_o);
3735-
}
3736-
}
3737-
}
3738-
assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
3739-
DECREF_INPUTS();
3720+
PyObject* res_o = _Py_VectorCallInstrumentation_StackRefSteal(
3721+
callable,
3722+
arguments,
3723+
total_args,
3724+
PyStackRef_NULL,
3725+
opcode == INSTRUMENTED_CALL,
3726+
frame,
3727+
this_instr,
3728+
tstate);
3729+
DEAD(args);
3730+
DEAD(self_or_null);
3731+
DEAD(callable);
37403732
ERROR_IF(res_o == NULL);
37413733
res = PyStackRef_FromPyObjectSteal(res_o);
37423734
}
@@ -4587,35 +4579,19 @@ dummy_func(
45874579
frame->return_offset = INSTRUCTION_SIZE;
45884580
DISPATCH_INLINED(new_frame);
45894581
}
4590-
/* Callable is not a normal Python function */
4591-
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
4592-
if (CONVERSION_FAILED(args_o)) {
4593-
DECREF_INPUTS();
4594-
ERROR_IF(true);
4595-
}
4596-
PyObject *res_o = PyObject_Vectorcall(
4597-
callable_o, args_o,
4598-
positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
4599-
kwnames_o);
4600-
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
4601-
if (opcode == INSTRUMENTED_CALL_KW) {
4602-
PyObject *arg = total_args == 0 ?
4603-
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]);
4604-
if (res_o == NULL) {
4605-
_Py_call_instrumentation_exc2(
4606-
tstate, PY_MONITORING_EVENT_C_RAISE,
4607-
frame, this_instr, callable_o, arg);
4608-
}
4609-
else {
4610-
int err = _Py_call_instrumentation_2args(
4611-
tstate, PY_MONITORING_EVENT_C_RETURN,
4612-
frame, this_instr, callable_o, arg);
4613-
if (err < 0) {
4614-
Py_CLEAR(res_o);
4615-
}
4616-
}
4617-
}
4618-
DECREF_INPUTS();
4582+
PyObject* res_o = _Py_VectorCallInstrumentation_StackRefSteal(
4583+
callable,
4584+
arguments,
4585+
total_args,
4586+
kwnames,
4587+
opcode == INSTRUMENTED_CALL_KW,
4588+
frame,
4589+
this_instr,
4590+
tstate);
4591+
DEAD(kwnames);
4592+
DEAD(args);
4593+
DEAD(self_or_null);
4594+
DEAD(callable);
46194595
ERROR_IF(res_o == NULL);
46204596
res = PyStackRef_FromPyObjectSteal(res_o);
46214597
}

Python/ceval.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,65 @@ _Py_VectorCall_StackRefSteal(
10711071
return res;
10721072
}
10731073

1074+
PyObject*
1075+
_Py_VectorCallInstrumentation_StackRefSteal(
1076+
_PyStackRef callable,
1077+
_PyStackRef* arguments,
1078+
int total_args,
1079+
_PyStackRef kwnames,
1080+
bool call_instrumentation,
1081+
_PyInterpreterFrame* frame,
1082+
_Py_CODEUNIT* this_instr,
1083+
PyThreadState* tstate)
1084+
{
1085+
PyObject* res;
1086+
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
1087+
if (CONVERSION_FAILED(args_o)) {
1088+
res = NULL;
1089+
goto cleanup;
1090+
}
1091+
PyObject* callable_o = PyStackRef_AsPyObjectBorrow(callable);
1092+
PyObject* kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames);
1093+
int positional_args = total_args;
1094+
if (kwnames_o != NULL) {
1095+
positional_args -= (int)PyTuple_GET_SIZE(kwnames_o);
1096+
}
1097+
res = PyObject_Vectorcall(
1098+
callable_o, args_o,
1099+
positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
1100+
kwnames_o);
1101+
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
1102+
if (call_instrumentation) {
1103+
PyObject* arg = total_args == 0 ?
1104+
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]);
1105+
if (res == NULL) {
1106+
_Py_call_instrumentation_exc2(
1107+
tstate, PY_MONITORING_EVENT_C_RAISE,
1108+
frame, this_instr, callable_o, arg);
1109+
}
1110+
else {
1111+
int err = _Py_call_instrumentation_2args(
1112+
tstate, PY_MONITORING_EVENT_C_RETURN,
1113+
frame, this_instr, callable_o, arg);
1114+
if (err < 0) {
1115+
Py_CLEAR(res);
1116+
}
1117+
}
1118+
}
1119+
assert((res != NULL) ^ (PyErr_Occurred() != NULL));
1120+
cleanup:
1121+
PyStackRef_XCLOSE(kwnames);
1122+
// arguments is a pointer into the GC visible stack,
1123+
// so we must NULL out values as we clear them.
1124+
for (int i = total_args - 1; i >= 0; i--) {
1125+
_PyStackRef tmp = arguments[i];
1126+
arguments[i] = PyStackRef_NULL;
1127+
PyStackRef_CLOSE(tmp);
1128+
}
1129+
PyStackRef_CLOSE(callable);
1130+
return res;
1131+
}
1132+
10741133
PyObject *
10751134
_Py_BuiltinCallFast_StackRefSteal(
10761135
_PyStackRef callable,

Python/ceval_macros.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,19 @@
8787
# elif defined(_MSC_VER) && (_MSC_VER < 1950)
8888
# error "You need at least VS 2026 / PlatformToolset v145 for tail calling."
8989
# endif
90-
91-
// Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
92-
# define Py_MUSTTAIL [[clang::musttail]]
93-
# define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
94-
Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
90+
# if defined(_MSC_VER) && !defined(__clang__)
91+
# define Py_MUSTTAIL [[msvc::musttail]]
92+
# define Py_PRESERVE_NONE_CC __preserve_none
93+
# else
94+
# define Py_MUSTTAIL __attribute__((musttail))
95+
# define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
96+
# endif
97+
typedef PyObject *(Py_PRESERVE_NONE_CC *py_tail_call_funcptr)(TAIL_CALL_PARAMS);
9598

9699
# define DISPATCH_TABLE_VAR instruction_funcptr_table
97100
# define DISPATCH_TABLE instruction_funcptr_handler_table
98101
# define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table
99-
# define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
102+
# define TARGET(op) Py_NO_INLINE PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_##op(TAIL_CALL_PARAMS)
100103

101104
# define DISPATCH_GOTO() \
102105
do { \

Python/executor_cases.c.h

Lines changed: 27 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)