diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 0cb57679df331d..a9d97e47e005df 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -105,7 +105,7 @@ struct _ts { # define _PyThreadState_WHENCE_INIT 1 # define _PyThreadState_WHENCE_FINI 2 # define _PyThreadState_WHENCE_THREADING 3 -# define _PyThreadState_WHENCE_GILSTATE 4 +# define _PyThreadState_WHENCE_C_API 4 # define _PyThreadState_WHENCE_EXEC 5 # define _PyThreadState_WHENCE_THREADING_DAEMON 6 #endif @@ -239,6 +239,20 @@ struct _ts { // structure and all share the same per-interpreter structure). PyStats *pystats; #endif + + struct { + /* Number of nested PyThreadState_Ensure() calls on this thread state */ + Py_ssize_t counter; + + /* Should this thread state be deleted upon calling + PyThreadState_Release() (with the counter at 1)? + + This is only true for thread states created by PyThreadState_Ensure() */ + int delete_on_release; + + /* The interpreter guard owned by PyThreadState_EnsureFromView(), if any. */ + PyInterpreterGuard *owned_guard; + } ensure; }; /* other API */ diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 2bfb84da36cbc8..6a0141a88bd526 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -1050,6 +1050,12 @@ struct _is { #endif #endif + struct { + _PyRWMutex lock; + Py_ssize_t countdown; + PyEvent done; + } finalization_guards; + /* the initial PyInterpreterState.threads.head */ _PyThreadStateImpl _initial_thread; // _initial_thread should be the last field of PyInterpreterState. diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 189a8dde9f09ed..317dea7c1a4a98 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -338,6 +338,21 @@ _Py_RecursionLimit_GetMargin(PyThreadState *tstate) #endif } +/* PEP 788 structures. */ + +struct _PyInterpreterGuard { + PyInterpreterState *interp; +}; + +struct _PyInterpreterView { + int64_t id; + Py_ssize_t refcount; +}; + +// Exports for '_testinternalcapi' shared extension +PyAPI_FUNC(Py_ssize_t) _PyInterpreterState_GuardCountdown(PyInterpreterState *interp); +PyAPI_FUNC(PyInterpreterState *) _PyInterpreterGuard_GetInterpreter(PyInterpreterGuard *guard); + #ifdef __cplusplus } #endif diff --git a/Include/pystate.h b/Include/pystate.h index 727b8fbfffe0e6..254702d43e8728 100644 --- a/Include/pystate.h +++ b/Include/pystate.h @@ -120,6 +120,23 @@ PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE); PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void); +/* PEP 788 -- Interpreter guards and views. */ + +typedef struct _PyInterpreterGuard PyInterpreterGuard; +typedef struct _PyInterpreterView PyInterpreterView; + +PyAPI_FUNC(PyInterpreterGuard *) PyInterpreterGuard_FromCurrent(void); +PyAPI_FUNC(void) PyInterpreterGuard_Close(PyInterpreterGuard *guard); +PyAPI_FUNC(PyInterpreterGuard *) PyInterpreterGuard_FromView(PyInterpreterView *view); + +PyAPI_FUNC(PyInterpreterView *) PyInterpreterView_FromCurrent(void); +PyAPI_FUNC(void) PyInterpreterView_Close(PyInterpreterView *view); +PyAPI_FUNC(PyInterpreterView *) PyInterpreterView_FromMain(void); + +PyAPI_FUNC(PyThreadState *) PyThreadState_Ensure(PyInterpreterGuard *guard); +PyAPI_FUNC(PyThreadState *) PyThreadState_EnsureFromView(PyInterpreterView *view); +PyAPI_FUNC(void) PyThreadState_Release(PyThreadState *tstate); + #ifndef Py_LIMITED_API # define Py_CPYTHON_PYSTATE_H # include "cpython/pystate.h" diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 1087cbd0836fd8..831aae9f264519 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1993,10 +1993,18 @@ def test_audit_run_stdin(self): def test_get_incomplete_frame(self): self.run_embedded_interpreter("test_get_incomplete_frame") - def test_gilstate_after_finalization(self): self.run_embedded_interpreter("test_gilstate_after_finalization") + def test_thread_state_ensure(self): + self.run_embedded_interpreter("test_thread_state_ensure") + + def test_main_interpreter_view(self): + self.run_embedded_interpreter("test_main_interpreter_view") + + def test_thread_state_ensure_from_view(self): + self.run_embedded_interpreter("test_thread_state_ensure_from_view") + class MiscTests(EmbeddingTestsMixin, unittest.TestCase): def test_unicode_id_init(self): diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3ebe4ceea6a72e..feb651e269b68b 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -2606,6 +2606,240 @@ create_managed_weakref_nogc_type(PyObject *self, PyObject *Py_UNUSED(args)) return PyType_FromSpec(&ManagedWeakrefNoGC_spec); } +static void +test_interp_guards_common(void) +{ + PyInterpreterGuard *guard = PyInterpreterGuard_FromCurrent(); + assert(guard != NULL); + + PyInterpreterGuard *guard_2 = PyInterpreterGuard_FromCurrent(); + assert(guard_2 != NULL); + + // We can close the guards in any order + PyInterpreterGuard_Close(guard_2); + PyInterpreterGuard_Close(guard); +} + +static PyObject * +test_interpreter_guards(PyObject *self, PyObject *unused) +{ + // Test the main interpreter + test_interp_guards_common(); + + // Test a (legacy) subinterpreter + PyThreadState *save_tstate = PyThreadState_Swap(NULL); + PyThreadState *interp_tstate = Py_NewInterpreter(); + test_interp_guards_common(); + Py_EndInterpreter(interp_tstate); + + // Test an isolated subinterpreter + PyInterpreterConfig config = { + .gil = PyInterpreterConfig_OWN_GIL, + .check_multi_interp_extensions = 1 + }; + + PyThreadState *isolated_interp_tstate; + PyStatus status = Py_NewInterpreterFromConfig(&isolated_interp_tstate, &config); + if (PyStatus_Exception(status)) { + PyErr_SetString(PyExc_RuntimeError, "interpreter creation failed"); + return NULL; + } + + test_interp_guards_common(); + Py_EndInterpreter(isolated_interp_tstate); + PyThreadState_Swap(save_tstate); + Py_RETURN_NONE; +} + +static PyObject * +test_thread_state_ensure_nested(PyObject *self, PyObject *unused) +{ + PyInterpreterGuard *guard = PyInterpreterGuard_FromCurrent(); + if (guard == NULL) { + return NULL; + } + PyThreadState *save_tstate = PyThreadState_Swap(NULL); + assert(PyGILState_GetThisThreadState() == save_tstate); + PyThreadState *thread_states[10]; + + for (int i = 0; i < 10; ++i) { + // Test reactivation of the detached tstate. + thread_states[i] = PyThreadState_Ensure(guard); + if (thread_states[i] == 0) { + PyInterpreterGuard_Close(guard); + return PyErr_NoMemory(); + } + + // No new thread state should've been created. + assert(PyThreadState_Get() == save_tstate); + PyThreadState_Release(thread_states[i]); + } + + assert(PyThreadState_GetUnchecked() == NULL); + + // Similarly, test ensuring with deep nesting and *then* releasing. + // If the (detached) gilstate matches the interpreter, then it shouldn't + // create a new thread state. + for (int i = 0; i < 10; ++i) { + thread_states[i] = PyThreadState_Ensure(guard); + if (thread_states[i] == 0) { + // This will technically leak other thread states, but it doesn't + // matter because this is a test. + PyInterpreterGuard_Close(guard); + return PyErr_NoMemory(); + } + + assert(PyThreadState_Get() == save_tstate); + } + + for (int i = 0; i < 10; ++i) { + assert(PyThreadState_Get() == save_tstate); + PyThreadState_Release(thread_states[i]); + } + + assert(PyThreadState_GetUnchecked() == NULL); + PyInterpreterGuard_Close(guard); + PyThreadState_Swap(save_tstate); + Py_RETURN_NONE; +} + +static PyObject * +test_thread_state_ensure_crossinterp(PyObject *self, PyObject *unused) +{ + PyInterpreterGuard *guard = PyInterpreterGuard_FromCurrent(); + PyThreadState *save_tstate = PyThreadState_Swap(NULL); + PyThreadState *interp_tstate = Py_NewInterpreter(); + assert(interp_tstate != NULL); + + /* This should create a new thread state for the calling interpreter, *not* + reactivate the old one. In a real-world scenario, this would arise in + something like this: + + def some_func(): + import something + # This re-enters the main interpreter, but we + # shouldn't have access to prior thread-locals. + something.call_something() + + interp = interpreters.create() + interp.exec(some_func) + */ + PyThreadState *thread_state = PyThreadState_Ensure(guard); + assert(thread_state != NULL); + + PyThreadState *ensured_tstate = PyThreadState_Get(); + assert(ensured_tstate != save_tstate); + assert(PyGILState_GetThisThreadState() == ensured_tstate); + + // Now though, we should reactivate the thread state + PyThreadState *other_thread_state = PyThreadState_Ensure(guard); + assert(other_thread_state != NULL); + assert(PyThreadState_Get() == ensured_tstate); + + PyThreadState_Release(other_thread_state); + + // Ensure that we're restoring the prior thread state + PyThreadState_Release(thread_state); + assert(PyThreadState_Get() == interp_tstate); + assert(PyGILState_GetThisThreadState() == interp_tstate); + + PyThreadState_Swap(interp_tstate); + Py_EndInterpreter(interp_tstate); + + PyInterpreterGuard_Close(guard); + PyThreadState_Swap(save_tstate); + Py_RETURN_NONE; +} + +static PyObject * +test_interp_view_after_shutdown(PyObject *self, PyObject *unused) +{ + PyThreadState *save_tstate = PyThreadState_Swap(NULL); + PyThreadState *interp_tstate = Py_NewInterpreter(); + if (interp_tstate == NULL) { + PyThreadState_Swap(save_tstate); + return PyErr_NoMemory(); + } + + PyInterpreterView *view = PyInterpreterView_FromCurrent(); + if (view == NULL) { + Py_EndInterpreter(interp_tstate); + PyThreadState_Swap(save_tstate); + return PyErr_NoMemory(); + } + + // As a sanity check, ensure that the view actually works + PyInterpreterGuard *guard = PyInterpreterGuard_FromView(view); + PyInterpreterGuard_Close(guard); + + // Now, destroy the interpreter and try to acquire a lock from a view. + // It should fail. + Py_EndInterpreter(interp_tstate); + guard = PyInterpreterGuard_FromView(view); + assert(guard == NULL); + + PyThreadState_Swap(save_tstate); + Py_RETURN_NONE; +} + +static PyObject * +test_thread_state_ensure_view(PyObject *self, PyObject *unused) +{ + // For simplicity's sake, we assume that functions won't fail due to being + // out of memory. + PyThreadState *save_tstate = PyThreadState_Swap(NULL); + PyThreadState *interp_tstate = Py_NewInterpreter(); + assert(interp_tstate != NULL); + assert(PyInterpreterState_Get() == PyThreadState_GetInterpreter(interp_tstate)); + + PyInterpreterView *main_view = PyInterpreterView_FromMain(); + assert(main_view != NULL); + + PyInterpreterView *view = PyInterpreterView_FromCurrent(); + assert(view != NULL); + + Py_BEGIN_ALLOW_THREADS; + PyThreadState *tstate = PyThreadState_EnsureFromView(view); + assert(tstate != NULL); + assert(PyThreadState_Get() == interp_tstate); + + // Test a nested call + PyThreadState *tstate2 = PyThreadState_EnsureFromView(view); + assert(PyThreadState_Get() == interp_tstate); + + // We're in a new interpreter now. PyThreadState_EnsureFromView() should + // now create a new thread state. + PyThreadState *main_tstate = PyThreadState_EnsureFromView(main_view); + assert(main_tstate == interp_tstate); // The old thread state + assert(PyInterpreterState_Get() == PyInterpreterState_Main()); + + // Going back to the old interpreter should create a new thread state again. + PyThreadState *tstate3 = PyThreadState_EnsureFromView(view); + assert(PyInterpreterState_Get() == PyThreadState_GetInterpreter(interp_tstate)); + assert(PyThreadState_Get() != interp_tstate); + PyThreadState_Release(tstate3); + PyThreadState_Release(main_tstate); + + // We're back in the original interpreter. PyThreadState_EnsureFromView() should + // no longer create a new thread state. + assert(PyThreadState_Get() == interp_tstate); + PyThreadState *tstate4 = PyThreadState_EnsureFromView(view); + assert(PyThreadState_Get() == interp_tstate); + PyThreadState_Release(tstate4); + PyThreadState_Release(tstate2); + PyThreadState_Release(tstate); + assert(PyThreadState_GetUnchecked() == NULL); + Py_END_ALLOW_THREADS; + + assert(PyThreadState_Get() == interp_tstate); + PyInterpreterView_Close(view); + PyInterpreterView_Close(main_view); + Py_EndInterpreter(interp_tstate); + PyThreadState_Swap(save_tstate); + + Py_RETURN_NONE; +} + static PyObject* test_soft_deprecated_macros(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) @@ -2740,6 +2974,11 @@ static PyMethodDef TestMethods[] = { {"create_managed_weakref_nogc_type", create_managed_weakref_nogc_type, METH_NOARGS}, {"test_soft_deprecated_macros", test_soft_deprecated_macros, METH_NOARGS}, + {"test_interpreter_lock", test_interpreter_guards, METH_NOARGS}, + {"test_thread_state_ensure_nested", test_thread_state_ensure_nested, METH_NOARGS}, + {"test_thread_state_ensure_crossinterp", test_thread_state_ensure_crossinterp, METH_NOARGS}, + {"test_interp_view_after_shutdown", test_interp_view_after_shutdown, METH_NOARGS}, + {"test_thread_state_ensure_view", test_thread_state_ensure_view, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index deac8570fe3241..c882245ac7a214 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2876,6 +2876,57 @@ test_threadstate_set_stack_protection(PyObject *self, PyObject *Py_UNUSED(args)) Py_RETURN_NONE; } +#define NUM_GUARDS 100 + +static PyObject * +test_interp_guard_countdown(PyObject *self, PyObject *unused) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + assert(_PyInterpreterState_GuardCountdown(interp) == 0); + PyInterpreterGuard *guards[NUM_GUARDS]; + for (int i = 0; i < NUM_GUARDS; ++i) { + guards[i] = PyInterpreterGuard_FromCurrent(); + assert(guards[i] != 0); + assert(_PyInterpreterState_GuardCountdown(interp) == i + 1); + } + + for (int i = 0; i < NUM_GUARDS; ++i) { + PyInterpreterGuard_Close(guards[i]); + assert(_PyInterpreterState_GuardCountdown(interp) == (NUM_GUARDS - i - 1)); + } + + Py_RETURN_NONE; +} + +static PyObject * +test_interp_view_countdown(PyObject *self, PyObject *unused) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + PyInterpreterView *view = PyInterpreterView_FromCurrent(); + if (view == NULL) { + return NULL; + } + assert(_PyInterpreterState_GuardCountdown(interp) == 0); + + PyInterpreterGuard *guards[NUM_GUARDS]; + + for (int i = 0; i < NUM_GUARDS; ++i) { + guards[i] = PyInterpreterGuard_FromView(view); + assert(guards[i] != 0); + assert(_PyInterpreterGuard_GetInterpreter(guards[i]) == interp); + assert(_PyInterpreterState_GuardCountdown(interp) == i + 1); + } + + for (int i = 0; i < NUM_GUARDS; ++i) { + PyInterpreterGuard_Close(guards[i]); + assert(_PyInterpreterState_GuardCountdown(interp) == (NUM_GUARDS - i - 1)); + } + + PyInterpreterView_Close(view); + Py_RETURN_NONE; +} + +#undef NUM_LOCKS static PyObject * _pyerr_setkeyerror(PyObject *self, PyObject *arg) @@ -3014,6 +3065,8 @@ static PyMethodDef module_functions[] = { {"test_threadstate_set_stack_protection", test_threadstate_set_stack_protection, METH_NOARGS}, {"_pyerr_setkeyerror", _pyerr_setkeyerror, METH_O}, + {"test_interp_guard_countdown", test_interp_guard_countdown, METH_NOARGS}, + {"test_interp_view_countdown", test_interp_view_countdown, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 285f4f091b2f7a..8575fb30c95a8d 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -2670,6 +2670,132 @@ test_gilstate_after_finalization(void) return PyThread_detach_thread(handle); } + +const char *THREAD_CODE = \ + "import time\n" + "time.sleep(0.2)\n" + "def fib(n):\n" + " if n <= 1:\n" + " return n\n" + " else:\n" + " return fib(n - 1) + fib(n - 2)\n" + "fib(10)"; + +typedef struct { + void *argument; + int done; + PyEvent event; +} ThreadData; + +static void +do_tstate_ensure(void *arg) +{ + ThreadData *data = (ThreadData *)arg; + PyThreadState *tstates[4]; + PyInterpreterGuard *guard = data->argument; + tstates[0] = PyThreadState_Ensure(guard); + tstates[1] = PyThreadState_Ensure(guard); + tstates[2] = PyThreadState_Ensure(guard); + PyGILState_STATE gstate = PyGILState_Ensure(); + tstates[3] = PyThreadState_Ensure(guard); + assert(tstates[0] != NULL); + assert(tstates[1] != NULL); + assert(tstates[2] != NULL); + assert(tstates[3] != NULL); + int res = PyRun_SimpleString(THREAD_CODE); + assert(res == 0); + PyThreadState_Release(tstates[3]); + PyGILState_Release(gstate); + PyThreadState_Release(tstates[2]); + PyThreadState_Release(tstates[1]); + PyThreadState_Release(tstates[0]); + PyInterpreterGuard_Close(guard); + data->done = 1; +} + +static int +test_thread_state_ensure(void) +{ + _testembed_initialize(); + PyThread_handle_t handle; + PyThread_ident_t ident; + PyInterpreterGuard *guard = PyInterpreterGuard_FromCurrent(); + assert(guard != NULL); + ThreadData data = { guard }; + if (PyThread_start_joinable_thread(do_tstate_ensure, &data, + &ident, &handle) < 0) { + PyInterpreterGuard_Close(guard); + return -1; + } + // We hold an interpreter guard, so we don't + // have to worry about the interpreter shutting down before + // we finalize. + Py_Finalize(); + assert(data.done == 1); + return 0; +} + +static int +test_main_interpreter_view(void) +{ + _testembed_initialize(); + + // Main interpreter is initialized and ready. + PyInterpreterView *view = PyInterpreterView_FromMain(); + assert(view != NULL); + + PyInterpreterGuard *guard = PyInterpreterGuard_FromView(view); + assert(guard != NULL); + PyInterpreterGuard_Close(guard); + + Py_Finalize(); + + // We shouldn't be able to get locks for the interpreter now + guard = PyInterpreterGuard_FromView(view); + assert(guard == NULL); + + PyInterpreterView_Close(view); + + return 0; +} + +static void +do_tstate_ensure_from_view(void *arg) +{ + ThreadData *data = (ThreadData *)arg; + PyInterpreterView *view = data->argument; + assert(view != NULL); + PyThreadState *tstate = PyThreadState_EnsureFromView(view); + assert(tstate != NULL); + _PyEvent_Notify(&data->event); + int res = PyRun_SimpleString(THREAD_CODE); + assert(res == 0); + data->done = 1; + PyThreadState_Release(tstate); +} + +static int +test_thread_state_ensure_from_view(void) +{ + _testembed_initialize(); + PyThread_handle_t handle; + PyThread_ident_t ident; + PyInterpreterView *view = PyInterpreterView_FromCurrent(); + assert(view != NULL); + + ThreadData data = { view }; + if (PyThread_start_joinable_thread(do_tstate_ensure_from_view, &data, + &ident, &handle) < 0) { + PyInterpreterView_Close(view); + return -1; + } + + PyEvent_Wait(&data.event); + Py_Finalize(); + assert(data.done == 1); + return 0; +} + /* ********************************************************* * List of test cases and the function that implements it. * @@ -2764,6 +2890,9 @@ static struct TestCase TestCases[] = { {"test_create_module_from_initfunc", test_create_module_from_initfunc}, {"test_inittab_submodule_multiphase", test_inittab_submodule_multiphase}, {"test_inittab_submodule_singlephase", test_inittab_submodule_singlephase}, + {"test_thread_state_ensure", test_thread_state_ensure}, + {"test_main_interpreter_view", test_main_interpreter_view}, + {"test_thread_state_ensure_from_view", test_thread_state_ensure_from_view}, {NULL, NULL} }; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0232ed6c382c61..7adac8fb9da0ec 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -2303,16 +2303,37 @@ make_pre_finalization_calls(PyThreadState *tstate, int subinterpreters) if (subinterpreters) { /* Clean up any lingering subinterpreters. - - Two preconditions need to be met here: - - - This has to happen before _PyRuntimeState_SetFinalizing is - called, or else threads might get prematurely blocked. - - The world must not be stopped, as finalizers can run. - */ + * Two preconditions need to be met here: + * 1. This has to happen before _PyRuntimeState_SetFinalizing is + * called, or else threads might get prematurely blocked. + * 2. The world must not be stopped, as finalizers can run. + */ finalize_subinterpreters(); } + /* Wait on finalization guards. + * + * To avoid eating CPU cycles, we use an event to signal when we reach + * zero remaining guards. But, this isn't atomic! This event can be reset + * later if another thread creates a new finalization guard. The actual + * atomic check is made below, when we hold the finalization guard lock. + * Again, this is purely an optimization to avoid overloading the CPU. + */ + if (_Py_atomic_load_ssize_relaxed(&interp->finalization_guards.countdown) > 0) { + for (;;) { + PyTime_t wait_ns = 1000 * 1000; // 1ms + if (PyEvent_WaitTimed(&interp->finalization_guards.done, wait_ns, /*detach=*/1)) { + break; + } + + // For debugging purposes, we emit a fatal error if someone + // CTRL^C'ed the process. + if (PyErr_CheckSignals()) { + PyErr_FormatUnraisable("Exception ignored while waiting on finalization guards"); + Py_FatalError("Interrupted while waiting on finalization guard"); + } + } + } /* Stop the world to prevent other threads from creating threads or * atexit callbacks. On the default build, this is simply locked by @@ -2324,18 +2345,26 @@ make_pre_finalization_calls(PyThreadState *tstate, int subinterpreters) // XXX Why does _PyThreadState_DeleteList() rely on all interpreters // being stopped? _PyEval_StopTheWorldAll(interp->runtime); + _PyRWMutex_Lock(&interp->finalization_guards.lock); int has_subinterpreters = subinterpreters ? runtime_has_subinterpreters(interp->runtime) : 0; + // TODO: The interpreter guard countdown isn't very efficient. We should + // wait on an event or something like that. int should_continue = (interp_has_threads(interp) || interp_has_atexit_callbacks(interp) || interp_has_pending_calls(interp) - || has_subinterpreters); + || has_subinterpreters + || _Py_atomic_load_ssize_acquire(&interp->finalization_guards.countdown) > 0); if (!should_continue) { break; } + // Temporarily let other threads execute + _PyThreadState_Detach(tstate); + _PyRWMutex_Unlock(&interp->finalization_guards.lock); _PyEval_StartTheWorldAll(interp->runtime); PyMutex_Unlock(&interp->ceval.pending.mutex); + _PyThreadState_Attach(tstate); } assert(PyMutex_IsLocked(&interp->ceval.pending.mutex)); ASSERT_WORLD_STOPPED(interp); @@ -2396,6 +2425,7 @@ _Py_Finalize(_PyRuntimeState *runtime) for (PyThreadState *p = list; p != NULL; p = p->next) { _PyThreadState_SetShuttingDown(p); } + _PyRWMutex_Unlock(&tstate->interp->finalization_guards.lock); _PyEval_StartTheWorldAll(runtime); PyMutex_Unlock(&tstate->interp->ceval.pending.mutex); @@ -2783,6 +2813,7 @@ Py_EndInterpreter(PyThreadState *tstate) _PyThreadState_SetShuttingDown(p); } + _PyRWMutex_Unlock(&interp->finalization_guards.lock); _PyEval_StartTheWorldAll(interp->runtime); PyMutex_Unlock(&interp->ceval.pending.mutex); _PyThreadState_DeleteList(list, /*is_after_fork=*/0); diff --git a/Python/pystate.c b/Python/pystate.c index d6a26f3339b863..e8d7e23db7af0e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1381,12 +1381,8 @@ interp_look_up_id(_PyRuntimeState *runtime, int64_t requested_id) return NULL; } -/* Return the interpreter state with the given ID. - - Fail with RuntimeError if the interpreter is not found. */ - -PyInterpreterState * -_PyInterpreterState_LookUpID(int64_t requested_id) +static PyInterpreterState * +_PyInterpreterState_LookUpIDNoErr(int64_t requested_id) { PyInterpreterState *interp = NULL; if (requested_id >= 0) { @@ -1395,6 +1391,18 @@ _PyInterpreterState_LookUpID(int64_t requested_id) interp = interp_look_up_id(runtime, requested_id); HEAD_UNLOCK(runtime); } + return interp; +} + +/* Return the interpreter state with the given ID. + + Fail with RuntimeError if the interpreter is not found. */ + +PyInterpreterState * +_PyInterpreterState_LookUpID(int64_t requested_id) +{ + assert(_PyThreadState_GET() != NULL); + PyInterpreterState *interp = _PyInterpreterState_LookUpIDNoErr(requested_id); if (interp == NULL && !PyErr_Occurred()) { PyErr_Format(PyExc_InterpreterNotFoundError, "unrecognized interpreter ID %lld", requested_id); @@ -1610,6 +1618,7 @@ static void add_threadstate(PyInterpreterState *interp, PyThreadState *tstate, PyThreadState *next) { + assert(interp != NULL); assert(interp->threads.head != tstate); if (next != NULL) { assert(next->prev == NULL || next->prev == tstate); @@ -1641,6 +1650,8 @@ new_threadstate(PyInterpreterState *interp, int whence) return NULL; } #endif + + #ifdef Py_STATS // The PyStats structure is quite large and is allocated separated from tstate. if (!_PyStats_ThreadInit(interp, tstate)) { @@ -2889,34 +2900,38 @@ PyGILState_Check(void) return (tstate == tcur); } +static PyInterpreterGuard * +get_main_interp_guard(void) +{ + PyInterpreterView *view = PyInterpreterView_FromMain(); + if (view == NULL) { + return NULL; + } + + return PyInterpreterGuard_FromView(view); +} + PyGILState_STATE PyGILState_Ensure(void) { - _PyRuntimeState *runtime = &_PyRuntime; - /* Note that we do not auto-init Python here - apart from potential races with 2 threads auto-initializing, pep-311 spells out other issues. Embedders are expected to have called Py_Initialize(). */ - /* Ensure that _PyEval_InitThreads() and _PyGILState_Init() have been - called by Py_Initialize() - - TODO: This isn't thread-safe. There's no protection here against - concurrent finalization of the interpreter; it's simply a guard - for *after* the interpreter has finalized. - */ - if (!_PyEval_ThreadsInitialized() || runtime->gilstate.autoInterpreterState == NULL) { - PyThread_hang_thread(); - } - PyThreadState *tcur = gilstate_get(); int has_gil; if (tcur == NULL) { /* Create a new Python thread state for this thread */ - // XXX Use PyInterpreterState_EnsureThreadState()? - tcur = new_threadstate(runtime->gilstate.autoInterpreterState, - _PyThreadState_WHENCE_GILSTATE); + PyInterpreterGuard *guard = get_main_interp_guard(); + if (guard == NULL) { + // The main interpreter has finished, so we don't have + // any intepreter to make a thread state for. Hang the + // thread to act as failure. + PyThread_hang_thread(); + } + tcur = new_threadstate(guard->interp, + _PyThreadState_WHENCE_C_API); if (tcur == NULL) { Py_FatalError("Couldn't create thread-state for new thread"); } @@ -2928,12 +2943,14 @@ PyGILState_Ensure(void) assert(tcur->gilstate_counter == 1); tcur->gilstate_counter = 0; has_gil = 0; /* new thread state is never current */ + PyInterpreterGuard_Close(guard); } else { has_gil = holds_gil(tcur); } if (!has_gil) { + // XXX Do we need to protect this against finalization? PyEval_RestoreThread(tcur); } @@ -3309,3 +3326,287 @@ _Py_GetMainConfig(void) } return _PyInterpreterState_GetConfig(interp); } + +Py_ssize_t +_PyInterpreterState_GuardCountdown(PyInterpreterState *interp) +{ + assert(interp != NULL); + Py_ssize_t count = _Py_atomic_load_ssize_relaxed(&interp->finalization_guards.countdown); + assert(count >= 0); + return count; +} + +PyInterpreterState * +_PyInterpreterGuard_GetInterpreter(PyInterpreterGuard *guard) +{ + assert(guard != NULL); + assert(guard->interp != NULL); + return guard->interp; +} + +static int +try_acquire_interp_guard(PyInterpreterState *interp, PyInterpreterGuard *guard) +{ + assert(interp != NULL); + _PyRWMutex_RLock(&interp->finalization_guards.lock); + + if (_PyInterpreterState_GetFinalizing(interp) != NULL) { + _PyRWMutex_RUnlock(&interp->finalization_guards.lock); + assert(_Py_atomic_load_ssize_relaxed(&interp->finalization_guards.countdown) == 0); + return -1; + } + + Py_ssize_t old_value = _Py_atomic_add_ssize(&interp->finalization_guards.countdown, 1); + if (old_value == 0) { + // Reset the event. + // We first have to notify the finalization thread if it's waiting on us, but + // it will get trapped waiting on the RW lock. When it goes to check + // again after we release the lock, it will see that the countdown is + // non-zero and begin waiting again (hence why we need to reset the + // event). + _PyEvent_Notify(&interp->finalization_guards.done); + memset(&interp->finalization_guards.done, 0, sizeof(PyEvent)); + } + _PyRWMutex_RUnlock(&interp->finalization_guards.lock); + + guard->interp = interp; + return 0; +} + +PyInterpreterGuard * +PyInterpreterGuard_FromCurrent(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp != NULL); + + PyInterpreterGuard *guard = PyMem_RawMalloc(sizeof(PyInterpreterGuard)); + if (guard == NULL) { + PyErr_NoMemory(); + return NULL; + } + + if (try_acquire_interp_guard(interp, guard) < 0) { + PyMem_RawFree(guard); + PyErr_SetString(PyExc_PythonFinalizationError, + "cannot acquire finalization guard anymore"); + return NULL; + } + + return guard; +} + +void +PyInterpreterGuard_Close(PyInterpreterGuard *guard) +{ + PyInterpreterState *interp = guard->interp; + assert(interp != NULL); + + _PyRWMutex_RLock(&interp->finalization_guards.lock); + Py_ssize_t old = _Py_atomic_add_ssize(&interp->finalization_guards.countdown, -1); + if (old == 1) { + _PyEvent_Notify(&interp->finalization_guards.done); + } + _PyRWMutex_RUnlock(&interp->finalization_guards.lock); + + assert(old > 0); + PyMem_RawFree(guard); +} + +PyInterpreterView * +PyInterpreterView_FromCurrent(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp != NULL); + + // PyInterpreterView_Close() can be called without an attached thread + // state, so we have to use the raw allocator. + PyInterpreterView *view = PyMem_RawMalloc(sizeof(PyInterpreterView)); + if (view == NULL) { + PyErr_NoMemory(); + return NULL; + } + + view->refcount = 1; + view->id = interp->id; + return view; +} + +void +PyInterpreterView_Close(PyInterpreterView *view) +{ + assert(view != NULL); + assert(view->refcount > 0); + if (--view->refcount == 0) { + PyMem_RawFree(view); + } +} + +PyInterpreterGuard * +PyInterpreterGuard_FromView(PyInterpreterView *view) +{ + assert(view != NULL); + int64_t interp_id = view->id; + assert(interp_id >= 0); + + // This allocation has to happen before we acquire the runtime lock, because + // PyMem_RawMalloc() might call some weird callback (such as tracemalloc) + // that tries to re-entrantly acquire the lock. + PyInterpreterGuard *guard = PyMem_RawMalloc(sizeof(PyInterpreterGuard)); + if (guard == NULL) { + return NULL; + } + + // Interpreters cannot be deleted while we hold the runtime lock. + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + PyInterpreterState *interp = interp_look_up_id(runtime, interp_id); + if (interp == NULL) { + HEAD_UNLOCK(runtime); + PyMem_RawFree(guard); + return NULL; + } + + int result = try_acquire_interp_guard(interp, guard); + HEAD_UNLOCK(runtime); + + if (result < 0) { + PyMem_RawFree(guard); + return NULL; + } + + assert(guard == NULL || guard->interp != NULL); + return guard; +} + +PyInterpreterView * +PyInterpreterView_FromMain(void) +{ + PyInterpreterView *view = PyMem_RawMalloc(sizeof(PyInterpreterView)); + if (view == NULL) { + return NULL; + } + + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + view->id = runtime->_main_interpreter.id; + view->refcount = 1; + HEAD_UNLOCK(runtime); + + return view; +} + +// This is a bit of a hack -- since NULL is reserved for failure, we need +// to have our own sentinel for when we want to indicate that no prior +// thread state was attached. +// To do this, we just use the memory address of a global variable and +// cast it to a PyThreadState *. +static int NO_TSTATE_SENTINEL = 0; + +PyThreadState * +PyThreadState_Ensure(PyInterpreterGuard *guard) +{ + assert(guard != NULL); + PyInterpreterState *interp = guard->interp; + assert(interp != NULL); + PyThreadState *attached_tstate = current_fast_get(); + if (attached_tstate != NULL && attached_tstate->interp == interp) { + /* Yay! We already have an attached thread state that matches. */ + ++attached_tstate->ensure.counter; + return (PyThreadState *)&NO_TSTATE_SENTINEL; + } + + PyThreadState *detached_gilstate = gilstate_get(); + if (detached_gilstate != NULL && detached_gilstate->interp == interp) { + /* There's a detached thread state that works. */ + assert(attached_tstate == NULL); + ++detached_gilstate->ensure.counter; + _PyThreadState_Attach(detached_gilstate); + return (PyThreadState *)&NO_TSTATE_SENTINEL; + } + + PyThreadState *fresh_tstate = _PyThreadState_NewBound(interp, + _PyThreadState_WHENCE_C_API); + if (fresh_tstate == NULL) { + return NULL; + } + fresh_tstate->ensure.counter = 1; + fresh_tstate->ensure.delete_on_release = 1; + + if (attached_tstate != NULL) { + return PyThreadState_Swap(fresh_tstate); + } else { + _PyThreadState_Attach(fresh_tstate); + } + + return (PyThreadState *)&NO_TSTATE_SENTINEL; +} + +PyThreadState * +PyThreadState_EnsureFromView(PyInterpreterView *view) +{ + assert(view != NULL); + PyInterpreterGuard *guard = PyInterpreterGuard_FromView(view); + if (guard == NULL) { + return NULL; + } + + PyThreadState *result_tstate = PyThreadState_Ensure(guard); + if (result_tstate == NULL) { + PyInterpreterGuard_Close(guard); + return NULL; + } + + PyThreadState *tstate = current_fast_get(); + assert(tstate != NULL); + + if (tstate->ensure.owned_guard != NULL) { + assert(tstate->ensure.owned_guard->interp == guard->interp); + PyInterpreterGuard_Close(guard); + } else { + assert(tstate->ensure.owned_guard == NULL); + tstate->ensure.owned_guard = guard; + } + + return result_tstate; +} + +void +PyThreadState_Release(PyThreadState *old_tstate) +{ + PyThreadState *tstate = current_fast_get(); + _Py_EnsureTstateNotNULL(tstate); + Py_ssize_t remaining = --tstate->ensure.counter; + if (remaining < 0) { + Py_FatalError("PyThreadState_Release() called more times than PyThreadState_Ensure()"); + } + + if (remaining != 0) { + return; + } + + PyThreadState *to_restore; + if (old_tstate == (PyThreadState *)&NO_TSTATE_SENTINEL) { + to_restore = NULL; + } + else { + to_restore = old_tstate; + } + + assert(tstate->ensure.delete_on_release == 1 || tstate->ensure.delete_on_release == 0); + if (tstate->ensure.delete_on_release) { + PyThreadState_Clear(tstate); + } else { + PyThreadState_Swap(to_restore); + } + + PyThreadState_Swap(to_restore); + + if (tstate->ensure.owned_guard != NULL) { + PyInterpreterGuard_Close(tstate->ensure.owned_guard); + tstate->ensure.owned_guard = NULL; + } + + if (tstate->ensure.delete_on_release) { + PyThreadState_Delete(tstate); + } +} diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index d2489387f46caa..2b9a5ee68de713 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -198,6 +198,9 @@ Python/pystate.c - _Py_tss_tstate - Python/pystate.c - _Py_tss_gilstate - Python/pystate.c - _Py_tss_interp - +# Global sentinel that is fine to share across interpreters +Python/pystate.c - NO_TSTATE_SENTINEL - + ##----------------------- ## should be const # XXX Make them const.