From 4a82ac9d678fccb486b113e2a25462e13a5996bf Mon Sep 17 00:00:00 2001 From: Sofia Donato Ferreira Date: Mon, 16 Mar 2026 22:49:03 -0300 Subject: [PATCH 1/3] profiling(Gecko): Properly obtain main thread identifier Since running a profiler via CLI (python -m profiling.sampling run) spawns a new subprocess where the actual user-specified code will run, a call to threading.main_thread() in the collector's process will not return the profiled process's main thread. To combat this, we rely on the fact that thread objects are inserted in such a way that the first object in the list represents the oldest ThreadState object [1], which corresponds to a ThreadState associated with the main thread. [1] - https://github.com/python/cpython/blob/1b118353bb0a9d816de6ef673f3b11775de5bec5/Include/internal/pycore_interp_structs.h#L831 Signed-off-by: Sofia Donato Ferreira --- Lib/profiling/sampling/gecko_collector.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 28ef9b69bf7968..752b57571f6ec8 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -170,6 +170,7 @@ def collect(self, stack_frames, timestamps_us=None): self.last_sample_time = times[-1] # Process threads + main_tid = None for interpreter_info in stack_frames: for thread_info in interpreter_info.threads: frames = filter_internal_frames(thread_info.frame_info) @@ -177,7 +178,11 @@ def collect(self, stack_frames, timestamps_us=None): # Initialize thread if needed if tid not in self.threads: - self.threads[tid] = self._create_thread(tid) + # Since 'threads' is in order from oldest to newest, + # we know the first thread must be the main thread. + if len(self.threads) == 0: + main_tid = tid + self.threads[tid] = self._create_thread(tid, main_tid) thread_data = self.threads[tid] @@ -288,14 +293,10 @@ def collect(self, stack_frames, timestamps_us=None): self.sample_count += len(times) - def _create_thread(self, tid): + def _create_thread(self, tid, main_tid): """Create a new thread structure with processed profile format.""" - # Determine if this is the main thread - try: - is_main = tid == threading.main_thread().ident - except (RuntimeError, AttributeError): - is_main = False + is_main = tid == main_tid thread = { "name": f"Thread-{tid}", From 8920f2f062bf3c93090c5ed2d1fab079baa6645f Mon Sep 17 00:00:00 2001 From: Sofia Donato Ferreira Date: Tue, 17 Mar 2026 15:41:52 -0300 Subject: [PATCH 2/3] profiling(gecko): take last thread as main thread instead of first one The ordering is actually newest -> oldest, since the _remote_debugging code traverses the ThreadState linked list in the intepreter state, appending to a list of threads in-order. Signed-off-by: Sofia Donato Ferreira --- Lib/profiling/sampling/gecko_collector.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 752b57571f6ec8..3e39715dce2dec 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -170,18 +170,16 @@ def collect(self, stack_frames, timestamps_us=None): self.last_sample_time = times[-1] # Process threads - main_tid = None for interpreter_info in stack_frames: + # Since 'threads' is in order from newest to oldest, + # we know the first thread must be the main thread. + main_tid = interpreter_info.threads[-1].thread_id for thread_info in interpreter_info.threads: frames = filter_internal_frames(thread_info.frame_info) tid = thread_info.thread_id # Initialize thread if needed if tid not in self.threads: - # Since 'threads' is in order from oldest to newest, - # we know the first thread must be the main thread. - if len(self.threads) == 0: - main_tid = tid self.threads[tid] = self._create_thread(tid, main_tid) thread_data = self.threads[tid] From d9b0a79ba7a170cae4404ad8ddea0a32c102fae4 Mon Sep 17 00:00:00 2001 From: Sofia Donato Ferreira Date: Tue, 17 Mar 2026 16:50:08 -0300 Subject: [PATCH 3/3] profiling(Gecko): check if thread list is not empty before accessing it It seems to sometimes happen that some samples (possibly at the very start / end) do not have any active threads in the interpreter state. It is OK to make main_tid maybe None here, since if it is, the threads list is empty, and so the for loop will not execute anyways. Signed-off-by: Sofia Donato Ferreira --- Lib/profiling/sampling/gecko_collector.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 3e39715dce2dec..7b42253e50f636 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -173,7 +173,9 @@ def collect(self, stack_frames, timestamps_us=None): for interpreter_info in stack_frames: # Since 'threads' is in order from newest to oldest, # we know the first thread must be the main thread. - main_tid = interpreter_info.threads[-1].thread_id + main_tid = None + if len(interpreter_info.threads) != 0: + main_tid = interpreter_info.threads[-1].thread_id for thread_info in interpreter_info.threads: frames = filter_internal_frames(thread_info.frame_info) tid = thread_info.thread_id