From 3153d7fe6f472e2151ffbbc03fffc64345d438b6 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 9 Mar 2026 12:16:11 -0400 Subject: [PATCH 1/3] gh-145685: Avoid contention on TYPE_LOCK in super() lookups --- Objects/typeobject.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5dc96bf251b384..340ae16d10312c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -12360,18 +12360,17 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * PyObject *mro, *res; Py_ssize_t i, n; - BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(su_obj_type); - /* keep a strong reference to mro because su_obj_type->tp_mro can be - replaced during PyDict_GetItemRef(dict, name, &res) and because - another thread can modify it after we end the critical section - below */ - Py_XINCREF(mro); - END_TYPE_LOCK(); - if (mro == NULL) return NULL; + /* Keep a strong reference to mro because su_obj_type->tp_mro can be + replaced during PyDict_GetItemRef(dict, name, &res). */ + PyThreadState *tstate = _PyThreadState_GET(); + _PyCStackRef mro_ref; + _PyThreadState_PushCStackRef(tstate, &mro_ref); + mro_ref.ref = PyStackRef_FromPyObjectNew(mro); + assert(PyTuple_Check(mro)); n = PyTuple_GET_SIZE(mro); @@ -12382,7 +12381,7 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * } i++; /* skip su->type (if any) */ if (i >= n) { - Py_DECREF(mro); + _PyThreadState_PopCStackRef(tstate, &mro_ref); return NULL; } @@ -12393,13 +12392,13 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * if (PyDict_GetItemRef(dict, name, &res) != 0) { // found or error - Py_DECREF(mro); + _PyThreadState_PopCStackRef(tstate, &mro_ref); return res; } i++; } while (i < n); - Py_DECREF(mro); + _PyThreadState_PopCStackRef(tstate, &mro_ref); return NULL; } From f6546b56a2ddc473373c64334e5662bf4f7149f5 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 10 Mar 2026 19:55:15 -0400 Subject: [PATCH 2/3] Refactor out _PyThreadState_PushCStackRefNew --- Include/internal/pycore_stackref.h | 7 +++++++ Objects/typeobject.c | 3 +-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 69d667b4be47d2..188da775eb1cc7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -770,6 +770,13 @@ _PyThreadState_PushCStackRef(PyThreadState *tstate, _PyCStackRef *ref) ref->ref = PyStackRef_NULL; } +static inline void +_PyThreadState_PushCStackRefNew(PyThreadState *tstate, _PyCStackRef *ref, PyObject *obj) +{ + _PyThreadState_PushCStackRef(tstate, ref); + ref->ref = PyStackRef_FromPyObjectNew(obj); +} + static inline void _PyThreadState_PopCStackRef(PyThreadState *tstate, _PyCStackRef *ref) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 340ae16d10312c..bb473dce68f65b 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -12368,8 +12368,7 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * replaced during PyDict_GetItemRef(dict, name, &res). */ PyThreadState *tstate = _PyThreadState_GET(); _PyCStackRef mro_ref; - _PyThreadState_PushCStackRef(tstate, &mro_ref); - mro_ref.ref = PyStackRef_FromPyObjectNew(mro); + _PyThreadState_PushCStackRefNew(tstate, &mro_ref, mro); assert(PyTuple_Check(mro)); n = PyTuple_GET_SIZE(mro); From 5f50767e9f037e37eb548feb7953ab5491b6bcf8 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 10 Mar 2026 20:21:50 -0400 Subject: [PATCH 3/3] Add scaling benchmark --- Tools/ftscalingbench/ftscalingbench.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index f60f5adba5c12c..8d8bbc88e7f30a 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -241,6 +241,22 @@ def instantiate_typing_namedtuple(): for _ in range(1000 * WORK_SCALE): obj = MyTypingNamedTuple(x=1, y=2, z=3) +@register_benchmark +def super_call(): + # TODO: super() on the same class from multiple threads still doesn't + # scale well, so use a class per-thread here for now. + class Base: + def method(self): + return 1 + + class Derived(Base): + def method(self): + return super().method() + + obj = Derived() + for _ in range(1000 * WORK_SCALE): + obj.method() + @register_benchmark def deepcopy():