Skip to content

Commit 0cf3af1

Browse files
committed
[3.14] gh-145779: Improve classmethod/staticmethod scaling in free-threaded build (#145826)
Add special cases for classmethod and staticmethod descriptors in _PyObject_GetMethodStackRef() to avoid calling tp_descr_get, which avoids reference count contention on the bound method and underlying callable. This improves scaling when calling classmethods and staticmethods from multiple threads. Also refactor method_vectorcall in classobject.c into a new _PyObject_VectorcallPrepend() helper so that it can be used by PyObject_VectorcallMethod as well. (cherry picked from commit e0f7c10)
1 parent b3c2ef5 commit 0cf3af1

File tree

14 files changed

+380
-138
lines changed

14 files changed

+380
-138
lines changed

Include/internal/pycore_call.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,14 @@ _PyObject_CallMethodIdOneArg(PyObject *self, _Py_Identifier *name, PyObject *arg
9898
}
9999

100100

101+
extern PyObject *_PyObject_VectorcallPrepend(
102+
PyThreadState *tstate,
103+
PyObject *callable,
104+
PyObject *arg,
105+
PyObject *const *args,
106+
size_t nargsf,
107+
PyObject *kwnames);
108+
101109
/* === Vectorcall protocol (PEP 590) ============================= */
102110

103111
// Call callable using tp_call. Arguments are like PyObject_Vectorcall(),

Include/internal/pycore_function.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@ static inline PyObject* _PyFunction_GET_BUILTINS(PyObject *func) {
4747
#define _PyFunction_GET_BUILTINS(func) _PyFunction_GET_BUILTINS(_PyObject_CAST(func))
4848

4949

50+
/* Get the callable wrapped by a classmethod.
51+
Returns a borrowed reference.
52+
The caller must ensure 'cm' is a classmethod object. */
53+
extern PyObject *_PyClassMethod_GetFunc(PyObject *cm);
54+
55+
/* Get the callable wrapped by a staticmethod.
56+
Returns a borrowed reference.
57+
The caller must ensure 'sm' is a staticmethod object. */
58+
extern PyObject *_PyStaticMethod_GetFunc(PyObject *sm);
59+
60+
5061
#ifdef __cplusplus
5162
}
5263
#endif

Include/internal/pycore_object.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,9 @@ extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *,
897897
extern unsigned int
898898
_PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out);
899899

900+
extern int _PyObject_GetMethodStackRef(PyThreadState *ts, _PyStackRef *self,
901+
PyObject *name, _PyStackRef *method);
902+
900903
// Cache the provided init method in the specialization cache of type if the
901904
// provided type version matches the current version of the type.
902905
//

Include/internal/pycore_stackref.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,13 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj, const char *filename, int linenumbe
127127
}
128128
#define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj), __FILE__, __LINE__)
129129

130+
static inline _PyStackRef
131+
_PyStackRef_FromPyObjectBorrow(PyObject *obj, const char *filename, int linenumber)
132+
{
133+
return _Py_stackref_create(obj, filename, linenumber);
134+
}
135+
#define PyStackRef_FromPyObjectBorrow(obj) _PyStackRef_FromPyObjectBorrow(_PyObject_CAST(obj), __FILE__, __LINE__)
136+
130137
static inline _PyStackRef
131138
_PyStackRef_FromPyObjectImmortal(PyObject *obj, const char *filename, int linenumber)
132139
{
@@ -320,6 +327,14 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj)
320327
}
321328
# define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj))
322329

330+
static inline _PyStackRef
331+
PyStackRef_FromPyObjectBorrow(PyObject *obj)
332+
{
333+
assert(obj != NULL);
334+
assert(((uintptr_t)obj & Py_TAG_BITS) == 0);
335+
return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_DEFERRED };
336+
}
337+
323338
static inline bool
324339
PyStackRef_IsHeapSafe(_PyStackRef stackref)
325340
{
@@ -538,6 +553,13 @@ PyStackRef_FromPyObjectSteal(PyObject *obj)
538553
return ref;
539554
}
540555

556+
static inline _PyStackRef
557+
PyStackRef_FromPyObjectBorrow(PyObject *obj)
558+
{
559+
assert(obj != NULL);
560+
return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT };
561+
}
562+
541563
static inline _PyStackRef
542564
PyStackRef_FromPyObjectStealMortal(PyObject *obj)
543565
{
@@ -753,6 +775,17 @@ _PyThreadState_PopCStackRef(PyThreadState *tstate, _PyCStackRef *ref)
753775
PyStackRef_XCLOSE(ref->ref);
754776
}
755777

778+
static inline _PyStackRef
779+
_PyThreadState_PopCStackRefSteal(PyThreadState *tstate, _PyCStackRef *ref)
780+
{
781+
#ifdef Py_GIL_DISABLED
782+
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
783+
assert(tstate_impl->c_stack_refs == ref);
784+
tstate_impl->c_stack_refs = ref->next;
785+
#endif
786+
return ref->ref;
787+
}
788+
756789
#ifdef Py_GIL_DISABLED
757790

758791
static inline int
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improve scaling of :func:`classmethod` and :func:`staticmethod` calls in
2+
the free-threaded build by avoiding the descriptor ``__get__`` call.

Objects/call.c

Lines changed: 96 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,60 @@ object_vacall(PyThreadState *tstate, PyObject *base,
825825
return result;
826826
}
827827

828+
PyObject *
829+
_PyObject_VectorcallPrepend(PyThreadState *tstate, PyObject *callable,
830+
PyObject *arg, PyObject *const *args,
831+
size_t nargsf, PyObject *kwnames)
832+
{
833+
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
834+
assert(nargs == 0 || args[nargs-1]);
835+
836+
PyObject *result;
837+
if (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET) {
838+
/* PY_VECTORCALL_ARGUMENTS_OFFSET is set, so we are allowed to mutate the vector */
839+
PyObject **newargs = (PyObject**)args - 1;
840+
nargs += 1;
841+
PyObject *tmp = newargs[0];
842+
newargs[0] = arg;
843+
assert(newargs[nargs-1]);
844+
result = _PyObject_VectorcallTstate(tstate, callable, newargs,
845+
nargs, kwnames);
846+
newargs[0] = tmp;
847+
}
848+
else {
849+
Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
850+
Py_ssize_t totalargs = nargs + nkwargs;
851+
if (totalargs == 0) {
852+
return _PyObject_VectorcallTstate(tstate, callable, &arg, 1, NULL);
853+
}
854+
855+
PyObject *newargs_stack[_PY_FASTCALL_SMALL_STACK];
856+
PyObject **newargs;
857+
if (totalargs <= (Py_ssize_t)Py_ARRAY_LENGTH(newargs_stack) - 1) {
858+
newargs = newargs_stack;
859+
}
860+
else {
861+
newargs = PyMem_Malloc((totalargs+1) * sizeof(PyObject *));
862+
if (newargs == NULL) {
863+
_PyErr_NoMemory(tstate);
864+
return NULL;
865+
}
866+
}
867+
/* use borrowed references */
868+
newargs[0] = arg;
869+
/* bpo-37138: since totalargs > 0, it's impossible that args is NULL.
870+
* We need this, since calling memcpy() with a NULL pointer is
871+
* undefined behaviour. */
872+
assert(args != NULL);
873+
memcpy(newargs + 1, args, totalargs * sizeof(PyObject *));
874+
result = _PyObject_VectorcallTstate(tstate, callable,
875+
newargs, nargs+1, kwnames);
876+
if (newargs != newargs_stack) {
877+
PyMem_Free(newargs);
878+
}
879+
}
880+
return result;
881+
}
828882

829883
PyObject *
830884
PyObject_VectorcallMethod(PyObject *name, PyObject *const *args,
@@ -835,28 +889,44 @@ PyObject_VectorcallMethod(PyObject *name, PyObject *const *args,
835889
assert(PyVectorcall_NARGS(nargsf) >= 1);
836890

837891
PyThreadState *tstate = _PyThreadState_GET();
838-
PyObject *callable = NULL;
892+
_PyCStackRef self, method;
893+
_PyThreadState_PushCStackRef(tstate, &self);
894+
_PyThreadState_PushCStackRef(tstate, &method);
839895
/* Use args[0] as "self" argument */
840-
int unbound = _PyObject_GetMethod(args[0], name, &callable);
841-
if (callable == NULL) {
896+
self.ref = PyStackRef_FromPyObjectBorrow(args[0]);
897+
int unbound = _PyObject_GetMethodStackRef(tstate, &self.ref, name, &method.ref);
898+
if (unbound < 0) {
899+
_PyThreadState_PopCStackRef(tstate, &method);
900+
_PyThreadState_PopCStackRef(tstate, &self);
842901
return NULL;
843902
}
844903

845-
if (unbound) {
904+
PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref);
905+
PyObject *self_obj = PyStackRef_AsPyObjectBorrow(self.ref);
906+
PyObject *result;
907+
908+
EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_METHOD, callable);
909+
if (self_obj == NULL) {
910+
/* Skip "self". We can keep PY_VECTORCALL_ARGUMENTS_OFFSET since
911+
* args[-1] in the onward call is args[0] here. */
912+
result = _PyObject_VectorcallTstate(tstate, callable,
913+
args + 1, nargsf - 1, kwnames);
914+
}
915+
else if (self_obj == args[0]) {
846916
/* We must remove PY_VECTORCALL_ARGUMENTS_OFFSET since
847917
* that would be interpreted as allowing to change args[-1] */
848-
nargsf &= ~PY_VECTORCALL_ARGUMENTS_OFFSET;
918+
result = _PyObject_VectorcallTstate(tstate, callable, args,
919+
nargsf & ~PY_VECTORCALL_ARGUMENTS_OFFSET,
920+
kwnames);
849921
}
850922
else {
851-
/* Skip "self". We can keep PY_VECTORCALL_ARGUMENTS_OFFSET since
852-
* args[-1] in the onward call is args[0] here. */
853-
args++;
854-
nargsf--;
923+
/* classmethod: self_obj is the type, not args[0]. Replace
924+
* args[0] with self_obj and call the underlying callable. */
925+
result = _PyObject_VectorcallPrepend(tstate, callable, self_obj,
926+
args + 1, nargsf - 1, kwnames);
855927
}
856-
EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_METHOD, callable);
857-
PyObject *result = _PyObject_VectorcallTstate(tstate, callable,
858-
args, nargsf, kwnames);
859-
Py_DECREF(callable);
928+
_PyThreadState_PopCStackRef(tstate, &method);
929+
_PyThreadState_PopCStackRef(tstate, &self);
860930
return result;
861931
}
862932

@@ -869,19 +939,26 @@ PyObject_CallMethodObjArgs(PyObject *obj, PyObject *name, ...)
869939
return null_error(tstate);
870940
}
871941

872-
PyObject *callable = NULL;
873-
int is_method = _PyObject_GetMethod(obj, name, &callable);
874-
if (callable == NULL) {
942+
_PyCStackRef self, method;
943+
_PyThreadState_PushCStackRef(tstate, &self);
944+
_PyThreadState_PushCStackRef(tstate, &method);
945+
self.ref = PyStackRef_FromPyObjectBorrow(obj);
946+
int res = _PyObject_GetMethodStackRef(tstate, &self.ref, name, &method.ref);
947+
if (res < 0) {
948+
_PyThreadState_PopCStackRef(tstate, &method);
949+
_PyThreadState_PopCStackRef(tstate, &self);
875950
return NULL;
876951
}
877-
obj = is_method ? obj : NULL;
952+
PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref);
953+
PyObject *self_obj = PyStackRef_AsPyObjectBorrow(self.ref);
878954

879955
va_list vargs;
880956
va_start(vargs, name);
881-
PyObject *result = object_vacall(tstate, obj, callable, vargs);
957+
PyObject *result = object_vacall(tstate, self_obj, callable, vargs);
882958
va_end(vargs);
883959

884-
Py_DECREF(callable);
960+
_PyThreadState_PopCStackRef(tstate, &method);
961+
_PyThreadState_PopCStackRef(tstate, &self);
885962
return result;
886963
}
887964

Objects/classobject.c

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -51,54 +51,7 @@ method_vectorcall(PyObject *method, PyObject *const *args,
5151
PyThreadState *tstate = _PyThreadState_GET();
5252
PyObject *self = PyMethod_GET_SELF(method);
5353
PyObject *func = PyMethod_GET_FUNCTION(method);
54-
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
55-
assert(nargs == 0 || args[nargs-1]);
56-
57-
PyObject *result;
58-
if (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET) {
59-
/* PY_VECTORCALL_ARGUMENTS_OFFSET is set, so we are allowed to mutate the vector */
60-
PyObject **newargs = (PyObject**)args - 1;
61-
nargs += 1;
62-
PyObject *tmp = newargs[0];
63-
newargs[0] = self;
64-
assert(newargs[nargs-1]);
65-
result = _PyObject_VectorcallTstate(tstate, func, newargs,
66-
nargs, kwnames);
67-
newargs[0] = tmp;
68-
}
69-
else {
70-
Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
71-
Py_ssize_t totalargs = nargs + nkwargs;
72-
if (totalargs == 0) {
73-
return _PyObject_VectorcallTstate(tstate, func, &self, 1, NULL);
74-
}
75-
76-
PyObject *newargs_stack[_PY_FASTCALL_SMALL_STACK];
77-
PyObject **newargs;
78-
if (totalargs <= (Py_ssize_t)Py_ARRAY_LENGTH(newargs_stack) - 1) {
79-
newargs = newargs_stack;
80-
}
81-
else {
82-
newargs = PyMem_Malloc((totalargs+1) * sizeof(PyObject *));
83-
if (newargs == NULL) {
84-
_PyErr_NoMemory(tstate);
85-
return NULL;
86-
}
87-
}
88-
/* use borrowed references */
89-
newargs[0] = self;
90-
/* bpo-37138: since totalargs > 0, it's impossible that args is NULL.
91-
* We need this, since calling memcpy() with a NULL pointer is
92-
* undefined behaviour. */
93-
assert(args != NULL);
94-
memcpy(newargs + 1, args, totalargs * sizeof(PyObject *));
95-
result = _PyObject_VectorcallTstate(tstate, func,
96-
newargs, nargs+1, kwnames);
97-
if (newargs != newargs_stack) {
98-
PyMem_Free(newargs);
99-
}
100-
}
101-
return result;
54+
return _PyObject_VectorcallPrepend(tstate, func, self, args, nargsf, kwnames);
10255
}
10356

10457

Objects/funcobject.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,7 @@ cm_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
14791479
}
14801480
cm->cm_callable = Py_None;
14811481
cm->cm_dict = NULL;
1482+
_PyObject_SetDeferredRefcount((PyObject *)cm);
14821483
return (PyObject *)cm;
14831484
}
14841485

@@ -1722,6 +1723,7 @@ sm_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
17221723
}
17231724
sm->sm_callable = Py_None;
17241725
sm->sm_dict = NULL;
1726+
_PyObject_SetDeferredRefcount((PyObject *)sm);
17251727
return (PyObject *)sm;
17261728
}
17271729

@@ -1889,3 +1891,17 @@ PyStaticMethod_New(PyObject *callable)
18891891
}
18901892
return (PyObject *)sm;
18911893
}
1894+
1895+
PyObject *
1896+
_PyClassMethod_GetFunc(PyObject *self)
1897+
{
1898+
classmethod *cm = _PyClassMethod_CAST(self);
1899+
return cm->cm_callable;
1900+
}
1901+
1902+
PyObject *
1903+
_PyStaticMethod_GetFunc(PyObject *self)
1904+
{
1905+
staticmethod *sm = _PyStaticMethod_CAST(self);
1906+
return sm->sm_callable;
1907+
}

0 commit comments

Comments
 (0)