Skip to content

Commit 578dbfd

Browse files
authored
Merge branch 'main' into lazy
2 parents 9715124 + 25397f9 commit 578dbfd

File tree

20 files changed

+351
-167
lines changed

20 files changed

+351
-167
lines changed

Doc/library/asyncio-queue.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ Queue
107107
The queue can no longer grow.
108108
Future calls to :meth:`~Queue.put` raise :exc:`QueueShutDown`.
109109
Currently blocked callers of :meth:`~Queue.put` will be unblocked
110-
and will raise :exc:`QueueShutDown` in the formerly blocked thread.
110+
and will raise :exc:`QueueShutDown` in the formerly awaiting task.
111111

112112
If *immediate* is false (the default), the queue can be wound
113113
down normally with :meth:`~Queue.get` calls to extract tasks

Lib/test/_test_atexit.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,19 @@ def func():
135135
finally:
136136
atexit.unregister(func)
137137

138+
def test_eq_unregister_clear(self):
139+
# Issue #112127: callback's __eq__ may call unregister or _clear
140+
class Evil:
141+
def __eq__(self, other):
142+
action(other)
143+
return NotImplemented
144+
145+
for action in atexit.unregister, lambda o: atexit._clear():
146+
with self.subTest(action=action):
147+
atexit.register(lambda: None)
148+
atexit.unregister(Evil())
149+
atexit._clear()
150+
138151

139152
if __name__ == "__main__":
140153
unittest.main()

Lib/test/test_generators.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,33 @@ def __iter__(self):
290290

291291
self.assertEqual([1,2], list(i for i in C()))
292292

293+
def test_close_clears_frame(self):
294+
# gh-142766: Test that closing a generator clears its frame
295+
class DetectDelete:
296+
def __init__(self):
297+
DetectDelete.deleted = False
298+
299+
def __del__(self):
300+
DetectDelete.deleted = True
301+
302+
def generator(arg):
303+
yield
304+
305+
# Test a freshly created generator (not suspended)
306+
g = generator(DetectDelete())
307+
g.close()
308+
self.assertTrue(DetectDelete.deleted)
309+
310+
# Test a suspended generator
311+
g = generator(DetectDelete())
312+
next(g)
313+
g.close()
314+
self.assertTrue(DetectDelete.deleted)
315+
316+
# Clear via gi_frame.clear()
317+
g = generator(DetectDelete())
318+
g.gi_frame.clear()
319+
self.assertTrue(DetectDelete.deleted)
293320

294321
class ModifyUnderlyingIterableTest(unittest.TestCase):
295322
iterables = [

Lib/test/test_profiling/test_sampling_profiler/test_integration.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,3 +863,98 @@ def test_async_aware_running_sees_only_cpu_task(self):
863863
self.assertGreater(cpu_percentage, 90.0,
864864
f"cpu_leaf should dominate samples in 'running' mode, "
865865
f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")
866+
867+
868+
def _generate_deep_generators_script(chain_depth=20, recurse_depth=150):
869+
"""Generate a script with deep nested generators for stress testing."""
870+
lines = [
871+
'import sys',
872+
'sys.setrecursionlimit(5000)',
873+
'',
874+
]
875+
# Generate chain of yield-from functions
876+
for i in range(chain_depth - 1):
877+
lines.extend([
878+
f'def deep_yield_chain_{i}(n):',
879+
f' yield ("L{i}", n)',
880+
f' yield from deep_yield_chain_{i + 1}(n)',
881+
'',
882+
])
883+
# Last chain function calls recursive_diver
884+
lines.extend([
885+
f'def deep_yield_chain_{chain_depth - 1}(n):',
886+
f' yield ("L{chain_depth - 1}", n)',
887+
f' yield from recursive_diver(n, {chain_depth})',
888+
'',
889+
'def recursive_diver(n, depth):',
890+
' yield (f"DIVE_{depth}", n)',
891+
f' if depth < {recurse_depth}:',
892+
' yield from recursive_diver(n, depth + 1)',
893+
' else:',
894+
' for i in range(5):',
895+
' yield (f"BOTTOM_{depth}", i)',
896+
'',
897+
'def oscillating_generator(iterations=1000):',
898+
' for i in range(iterations):',
899+
' yield ("OSCILLATE", i)',
900+
' yield from deep_yield_chain_0(i)',
901+
'',
902+
'def run_forever():',
903+
' while True:',
904+
' for _ in oscillating_generator(10):',
905+
' pass',
906+
'',
907+
'_test_sock.sendall(b"working")',
908+
'run_forever()',
909+
])
910+
return '\n'.join(lines)
911+
912+
913+
@requires_remote_subprocess_debugging()
914+
class TestDeepGeneratorFrameCache(unittest.TestCase):
915+
"""Test frame cache consistency with deep oscillating generator stacks."""
916+
917+
def test_all_stacks_share_same_base_frame(self):
918+
"""Verify all sampled stacks reach the entry point function.
919+
920+
When profiling deep generators that oscillate up and down the call
921+
stack, every sample should include the entry point function
922+
(run_forever) in its call chain. If the frame cache stores
923+
incomplete stacks, some samples will be missing this base function,
924+
causing broken flamegraphs.
925+
"""
926+
script = _generate_deep_generators_script()
927+
with test_subprocess(script, wait_for_working=True) as subproc:
928+
collector = CollapsedStackCollector(sample_interval_usec=1, skip_idle=False)
929+
930+
with (
931+
io.StringIO() as captured_output,
932+
mock.patch("sys.stdout", captured_output),
933+
):
934+
profiling.sampling.sample.sample(
935+
subproc.process.pid,
936+
collector,
937+
duration_sec=2,
938+
)
939+
940+
samples_with_entry_point = 0
941+
samples_without_entry_point = 0
942+
total_samples = 0
943+
944+
for (call_tree, _thread_id), count in collector.stack_counter.items():
945+
total_samples += count
946+
if call_tree:
947+
has_entry_point = call_tree and call_tree[0][2] == "<module>"
948+
if has_entry_point:
949+
samples_with_entry_point += count
950+
else:
951+
samples_without_entry_point += count
952+
953+
self.assertGreater(total_samples, 100,
954+
f"Expected at least 100 samples, got {total_samples}")
955+
956+
self.assertEqual(samples_without_entry_point, 0,
957+
f"Found {samples_without_entry_point}/{total_samples} samples "
958+
f"missing the entry point function 'run_forever'. This indicates "
959+
f"incomplete stacks are being returned, likely due to frame cache "
960+
f"storing partial stack traces.")

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,7 @@ Jim Jewett
908908
Pedro Diaz Jimenez
909909
Orjan Johansen
910910
Fredrik Johansson
911+
Benjamin Johnson
911912
Benjamin K. Johnson
912913
Gregory K. Johnson
913914
Kent Johnson
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Clear the frame of a generator when :meth:`generator.close` is called.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix incomplete stack traces in the Tachyon profiler's frame cache when
2+
profiling code with deeply nested generators. The frame cache now validates
3+
that stack traces reach the base frame before caching, preventing broken
4+
flamegraphs. Patch by Pablo Galindo.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix possible use-after-free in :func:`atexit.unregister` when the callback
2+
is unregistered during comparison.

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,35 @@ typedef struct {
279279
size_t count;
280280
} StackChunkList;
281281

282+
/*
283+
* Context for frame chain traversal operations.
284+
*/
285+
typedef struct {
286+
/* Inputs */
287+
uintptr_t frame_addr; // Starting frame address
288+
uintptr_t base_frame_addr; // Sentinel at bottom (for validation)
289+
uintptr_t gc_frame; // GC frame address (0 if not tracking)
290+
uintptr_t last_profiled_frame; // Last cached frame (0 if no cache)
291+
StackChunkList *chunks; // Pre-copied stack chunks
292+
293+
/* Outputs */
294+
PyObject *frame_info; // List to append FrameInfo objects
295+
uintptr_t *frame_addrs; // Array of visited frame addresses
296+
Py_ssize_t num_addrs; // Count of addresses collected
297+
Py_ssize_t max_addrs; // Capacity of frame_addrs array
298+
uintptr_t last_frame_visited; // Last frame address visited
299+
int stopped_at_cached_frame; // Whether we stopped at cached frame
300+
} FrameWalkContext;
301+
302+
/*
303+
* Context for code object parsing.
304+
*/
305+
typedef struct {
306+
uintptr_t code_addr; // Code object address in remote process
307+
uintptr_t instruction_pointer; // Current instruction pointer
308+
int32_t tlbc_index; // Thread-local bytecode index (free-threading)
309+
} CodeObjectContext;
310+
282311
/* Function pointer types for iteration callbacks */
283312
typedef int (*thread_processor_func)(
284313
RemoteUnwinderObject *unwinder,
@@ -343,10 +372,7 @@ extern long read_py_long(RemoteUnwinderObject *unwinder, uintptr_t address);
343372
extern int parse_code_object(
344373
RemoteUnwinderObject *unwinder,
345374
PyObject **result,
346-
uintptr_t address,
347-
uintptr_t instruction_pointer,
348-
uintptr_t *previous_frame,
349-
int32_t tlbc_index
375+
const CodeObjectContext *ctx
350376
);
351377

352378
extern PyObject *make_location_info(
@@ -420,16 +446,7 @@ extern void *find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr);
420446

421447
extern int process_frame_chain(
422448
RemoteUnwinderObject *unwinder,
423-
uintptr_t initial_frame_addr,
424-
StackChunkList *chunks,
425-
PyObject *frame_info,
426-
uintptr_t base_frame_addr,
427-
uintptr_t gc_frame,
428-
uintptr_t last_profiled_frame,
429-
int *stopped_at_cached_frame,
430-
uintptr_t *frame_addrs,
431-
Py_ssize_t *num_addrs,
432-
Py_ssize_t max_addrs
449+
FrameWalkContext *ctx
433450
);
434451

435452
/* Frame cache functions */
@@ -447,20 +464,19 @@ extern int frame_cache_lookup_and_extend(
447464
Py_ssize_t *num_addrs,
448465
Py_ssize_t max_addrs);
449466
// Returns: 1 = stored, 0 = not stored (graceful), -1 = error
467+
// Only stores complete stacks that reach base_frame_addr
450468
extern int frame_cache_store(
451469
RemoteUnwinderObject *unwinder,
452470
uint64_t thread_id,
453471
PyObject *frame_list,
454472
const uintptr_t *addrs,
455-
Py_ssize_t num_addrs);
473+
Py_ssize_t num_addrs,
474+
uintptr_t base_frame_addr,
475+
uintptr_t last_frame_visited);
456476

457477
extern int collect_frames_with_cache(
458478
RemoteUnwinderObject *unwinder,
459-
uintptr_t frame_addr,
460-
StackChunkList *chunks,
461-
PyObject *frame_info,
462-
uintptr_t gc_frame,
463-
uintptr_t last_profiled_frame,
479+
FrameWalkContext *ctx,
464480
uint64_t thread_id);
465481

466482
/* ============================================================================

Modules/_remote_debugging/code_objects.c

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
7676
PyErr_SetString(PyExc_RuntimeError, "TLBC array size exceeds maximum limit");
7777
return 0; // Invalid size
7878
}
79+
assert(tlbc_size > 0 && tlbc_size <= MAX_TLBC_SIZE);
7980

8081
// Allocate and read the entire TLBC array
8182
size_t array_data_size = tlbc_size * sizeof(void*);
@@ -156,8 +157,11 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
156157
const uint8_t* ptr = (const uint8_t*)(linetable);
157158
uintptr_t addr = 0;
158159
int computed_line = firstlineno; // Running accumulator, separate from output
160+
const size_t MAX_LINETABLE_ENTRIES = 65536;
161+
size_t entry_count = 0;
159162

160-
while (*ptr != '\0') {
163+
while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
164+
entry_count++;
161165
uint8_t first_byte = *(ptr++);
162166
uint8_t code = (first_byte >> 3) & 15;
163167
size_t length = (first_byte & 7) + 1;
@@ -277,12 +281,9 @@ make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *locati
277281
int
278282
parse_code_object(RemoteUnwinderObject *unwinder,
279283
PyObject **result,
280-
uintptr_t address,
281-
uintptr_t instruction_pointer,
282-
uintptr_t *previous_frame,
283-
int32_t tlbc_index)
284+
const CodeObjectContext *ctx)
284285
{
285-
void *key = (void *)address;
286+
void *key = (void *)ctx->code_addr;
286287
CachedCodeMetadata *meta = NULL;
287288
PyObject *func = NULL;
288289
PyObject *file = NULL;
@@ -291,9 +292,9 @@ parse_code_object(RemoteUnwinderObject *unwinder,
291292
#ifdef Py_GIL_DISABLED
292293
// In free threading builds, code object addresses might have the low bit set
293294
// as a flag, so we need to mask it off to get the real address
294-
uintptr_t real_address = address & (~1);
295+
uintptr_t real_address = ctx->code_addr & (~1);
295296
#else
296-
uintptr_t real_address = address;
297+
uintptr_t real_address = ctx->code_addr;
297298
#endif
298299

299300
if (unwinder && unwinder->code_object_cache != NULL) {
@@ -360,12 +361,12 @@ parse_code_object(RemoteUnwinderObject *unwinder,
360361
linetable = NULL;
361362
}
362363

363-
uintptr_t ip = instruction_pointer;
364+
uintptr_t ip = ctx->instruction_pointer;
364365
ptrdiff_t addrq;
365366

366367
#ifdef Py_GIL_DISABLED
367368
// Handle thread-local bytecode (TLBC) in free threading builds
368-
if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) {
369+
if (ctx->tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) {
369370
// No TLBC or no unwinder - use main bytecode directly
370371
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
371372
goto done_tlbc;
@@ -383,10 +384,12 @@ parse_code_object(RemoteUnwinderObject *unwinder,
383384
tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
384385
}
385386

386-
if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) {
387+
if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) {
388+
assert(ctx->tlbc_index >= 0);
389+
assert(tlbc_entry->tlbc_array_size > 0);
387390
// Use cached TLBC data
388391
uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
389-
uintptr_t tlbc_bytecode_addr = entries[tlbc_index];
392+
uintptr_t tlbc_bytecode_addr = entries[ctx->tlbc_index];
390393

391394
if (tlbc_bytecode_addr != 0) {
392395
// Calculate offset from TLBC bytecode
@@ -401,8 +404,6 @@ parse_code_object(RemoteUnwinderObject *unwinder,
401404
done_tlbc:
402405
#else
403406
// Non-free-threaded build, always use the main bytecode
404-
(void)tlbc_index; // Suppress unused parameter warning
405-
(void)unwinder; // Suppress unused parameter warning
406407
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
407408
#endif
408409
; // Empty statement to avoid C23 extension warning

0 commit comments

Comments
 (0)