@@ -629,6 +629,8 @@ init_interpreter(PyInterpreterState *interp,
629629 assert (next != NULL || (interp == runtime -> interpreters .main ));
630630 interp -> next = next ;
631631
632+ interp -> threads .preallocated = & interp -> _initial_thread ;
633+
632634 // We would call _PyObject_InitState() at this point
633635 // if interp->feature_flags were alredy set.
634636
@@ -766,7 +768,6 @@ PyInterpreterState_New(void)
766768 return interp ;
767769}
768770
769-
770771static void
771772interpreter_clear (PyInterpreterState * interp , PyThreadState * tstate )
772773{
@@ -910,6 +911,9 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
910911 // XXX Once we have one allocator per interpreter (i.e.
911912 // per-interpreter GC) we must ensure that all of the interpreter's
912913 // objects have been cleaned up at the point.
914+
915+ // We could clear interp->threads.freelist here
916+ // if it held more than just the initial thread state.
913917}
914918
915919
@@ -1386,22 +1390,45 @@ allocate_chunk(int size_in_bytes, _PyStackChunk* previous)
13861390 return res ;
13871391}
13881392
1393+ static void
1394+ reset_threadstate (_PyThreadStateImpl * tstate )
1395+ {
1396+ // Set to _PyThreadState_INIT directly?
1397+ memcpy (tstate ,
1398+ & initial ._main_interpreter ._initial_thread ,
1399+ sizeof (* tstate ));
1400+ }
1401+
13891402static _PyThreadStateImpl *
1390- alloc_threadstate (void )
1403+ alloc_threadstate (PyInterpreterState * interp )
13911404{
1392- return PyMem_RawCalloc (1 , sizeof (_PyThreadStateImpl ));
1405+ _PyThreadStateImpl * tstate ;
1406+
1407+ // Try the preallocated tstate first.
1408+ tstate = _Py_atomic_exchange_ptr (& interp -> threads .preallocated , NULL );
1409+
1410+ // Fall back to the allocator.
1411+ if (tstate == NULL ) {
1412+ tstate = PyMem_RawCalloc (1 , sizeof (_PyThreadStateImpl ));
1413+ if (tstate == NULL ) {
1414+ return NULL ;
1415+ }
1416+ reset_threadstate (tstate );
1417+ }
1418+ return tstate ;
13931419}
13941420
13951421static void
13961422free_threadstate (_PyThreadStateImpl * tstate )
13971423{
1424+ PyInterpreterState * interp = tstate -> base .interp ;
13981425 // The initial thread state of the interpreter is allocated
13991426 // as part of the interpreter state so should not be freed.
1400- if (tstate == & tstate -> base . interp -> _initial_thread ) {
1401- // Restore to _PyThreadState_INIT .
1402- memcpy (tstate ,
1403- & initial . _main_interpreter . _initial_thread ,
1404- sizeof ( * tstate ) );
1427+ if (tstate == & interp -> _initial_thread ) {
1428+ // Make it available again .
1429+ reset_threadstate (tstate );
1430+ assert ( interp -> threads . preallocated == NULL );
1431+ _Py_atomic_store_ptr ( & interp -> threads . preallocated , tstate );
14051432 }
14061433 else {
14071434 PyMem_RawFree (tstate );
@@ -1492,66 +1519,38 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
14921519static PyThreadState *
14931520new_threadstate (PyInterpreterState * interp , int whence )
14941521{
1495- _PyThreadStateImpl * tstate ;
1496- _PyRuntimeState * runtime = interp -> runtime ;
1497- // We don't need to allocate a thread state for the main interpreter
1498- // (the common case), but doing it later for the other case revealed a
1499- // reentrancy problem (deadlock). So for now we always allocate before
1500- // taking the interpreters lock. See GH-96071.
1501- _PyThreadStateImpl * new_tstate = alloc_threadstate ();
1502- int used_newtstate ;
1503- if (new_tstate == NULL ) {
1522+ // Allocate the thread state.
1523+ _PyThreadStateImpl * tstate = alloc_threadstate (interp );
1524+ if (tstate == NULL ) {
15041525 return NULL ;
15051526 }
1527+
15061528#ifdef Py_GIL_DISABLED
15071529 Py_ssize_t qsbr_idx = _Py_qsbr_reserve (interp );
15081530 if (qsbr_idx < 0 ) {
1509- PyMem_RawFree ( new_tstate );
1531+ free_threadstate ( tstate );
15101532 return NULL ;
15111533 }
15121534 int32_t tlbc_idx = _Py_ReserveTLBCIndex (interp );
15131535 if (tlbc_idx < 0 ) {
1514- PyMem_RawFree ( new_tstate );
1536+ free_threadstate ( tstate );
15151537 return NULL ;
15161538 }
15171539#endif
15181540
15191541 /* We serialize concurrent creation to protect global state. */
1520- HEAD_LOCK (runtime );
1542+ HEAD_LOCK (interp -> runtime );
15211543
1544+ // Initialize the new thread state.
15221545 interp -> threads .next_unique_id += 1 ;
15231546 uint64_t id = interp -> threads .next_unique_id ;
1547+ init_threadstate (tstate , interp , id , whence );
15241548
1525- // Allocate the thread state and add it to the interpreter.
1549+ // Add the new thread state to the interpreter.
15261550 PyThreadState * old_head = interp -> threads .head ;
1527- if (old_head == NULL ) {
1528- // It's the interpreter's initial thread state.
1529- used_newtstate = 0 ;
1530- tstate = & interp -> _initial_thread ;
1531- }
1532- // XXX Re-use interp->_initial_thread if not in use?
1533- else {
1534- // Every valid interpreter must have at least one thread.
1535- assert (id > 1 );
1536- assert (old_head -> prev == NULL );
1537- used_newtstate = 1 ;
1538- tstate = new_tstate ;
1539- // Set to _PyThreadState_INIT.
1540- memcpy (tstate ,
1541- & initial ._main_interpreter ._initial_thread ,
1542- sizeof (* tstate ));
1543- }
1544-
1545- init_threadstate (tstate , interp , id , whence );
15461551 add_threadstate (interp , (PyThreadState * )tstate , old_head );
15471552
1548- HEAD_UNLOCK (runtime );
1549- if (!used_newtstate ) {
1550- // Must be called with lock unlocked to avoid re-entrancy deadlock.
1551- PyMem_RawFree (new_tstate );
1552- }
1553- else {
1554- }
1553+ HEAD_UNLOCK (interp -> runtime );
15551554
15561555#ifdef Py_GIL_DISABLED
15571556 // Must be called with lock unlocked to avoid lock ordering deadlocks.
0 commit comments