From 0776a59d7cfb3945009abeca126b629470b0484f Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Aug 2025 14:44:00 +0900 Subject: [PATCH 1/8] gh-137838: Fix JIT trace buffer overrun by pre-reserving exit stub space --- Lib/test/test_sys_settrace.py | 3 +++ Lib/test/test_trace.py | 6 ++++++ Python/optimizer.c | 24 ++++++++++-------------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_sys_settrace.py b/Lib/test/test_sys_settrace.py index b3685a91c57ee7..27fcac50ffa7c9 100644 --- a/Lib/test/test_sys_settrace.py +++ b/Lib/test/test_sys_settrace.py @@ -13,6 +13,7 @@ import textwrap import subprocess import warnings + try: import _testinternalcapi except ImportError: @@ -360,6 +361,8 @@ class TraceTestCase(unittest.TestCase): # Disable gc collection when tracing, otherwise the # deallocators may be traced as well. def setUp(self): + if os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0': + self.skipTest("Line tracing behavior differs when JIT optimizer is disabled") self.using_gc = gc.isenabled() gc.disable() self.addCleanup(sys.settrace, sys.gettrace()) diff --git a/Lib/test/test_trace.py b/Lib/test/test_trace.py index bf54c9995376d6..19eee19bdea6d5 100644 --- a/Lib/test/test_trace.py +++ b/Lib/test/test_trace.py @@ -142,6 +142,8 @@ def test_traced_func_linear(self): self.assertEqual(self.tracer.results().counts, expected) + @unittest.skipIf(os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0', + "Line counts differ when JIT optimizer is disabled") def test_traced_func_loop(self): self.tracer.runfunc(traced_func_loop, 2, 3) @@ -166,6 +168,8 @@ def test_traced_func_importing(self): self.assertEqual(self.tracer.results().counts, expected) + @unittest.skipIf(os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0', + "Line counts differ when JIT optimizer is disabled") def test_trace_func_generator(self): self.tracer.runfunc(traced_func_calling_generator) @@ -236,6 +240,8 @@ def setUp(self): self.my_py_filename = fix_ext_py(__file__) self.addCleanup(sys.settrace, sys.gettrace()) + @unittest.skipIf(os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0', + "Line counts differ when JIT optimizer is disabled") def test_exec_counts(self): self.tracer = Trace(count=1, trace=0, countfuncs=0, countcallers=0) code = r'''traced_func_loop(2, 5)''' diff --git a/Python/optimizer.c b/Python/optimizer.c index bae5cfa50ead58..1a99eef0cc2c2f 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -540,6 +540,7 @@ add_to_trace( assert(func == NULL || func->func_code == (PyObject *)code); \ instr = trace_stack[trace_stack_depth].instr; + /* Returns the length of the trace on success, * 0 if it failed to produce a worthwhile trace, * and -1 on an error. @@ -560,8 +561,10 @@ translate_bytecode_to_trace( _Py_BloomFilter_Add(dependencies, initial_code); _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; - // Leave space for possible trailing _EXIT_TRACE - int max_length = buffer_size-2; + // Leave space for possible trailing _EXIT_TRACE and estimated exit stubs + // Reserve 20% of buffer space for exit stubs (empirically sufficient) + int max_exit_stubs = (buffer_size * 20) / 100; // 20% for exit stubs + int max_length = buffer_size - 2 - max_exit_stubs; struct { PyFunctionObject *func; PyCodeObject *code; @@ -647,16 +650,7 @@ translate_bytecode_to_trace( assert(!OPCODE_HAS_DEOPT(opcode)); } - if (OPCODE_HAS_EXIT(opcode)) { - // Make space for side exit and final _EXIT_TRACE: - RESERVE_RAW(2, "_EXIT_TRACE"); - max_length--; - } - if (OPCODE_HAS_ERROR(opcode)) { - // Make space for error stub and final _EXIT_TRACE: - RESERVE_RAW(2, "_ERROR_POP_N"); - max_length--; - } + // Note: Exit stub space is pre-reserved in max_length calculation above switch (opcode) { case POP_JUMP_IF_NONE: case POP_JUMP_IF_NOT_NONE: @@ -731,9 +725,11 @@ translate_bytecode_to_trace( { const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; if (expansion->nuops > 0) { - // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE) + // Reserve space for nuops int nuops = expansion->nuops; - RESERVE(nuops + 1); /* One extra for exit */ + + // Reserve space for nuops (exit stub space already pre-reserved) + RESERVE(nuops); int16_t last_op = expansion->uops[nuops-1].uop; if (last_op == _RETURN_VALUE || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { // Check for trace stack underflow now: From eff78b4c5bd0690c48ad7a0c325c8c14e790fa52 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Aug 2025 14:58:33 +0900 Subject: [PATCH 2/8] Add NEWS.d --- .../2025-08-27-14-58-26.gh-issue-137838.lK6T0j.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-14-58-26.gh-issue-137838.lK6T0j.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-14-58-26.gh-issue-137838.lK6T0j.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-14-58-26.gh-issue-137838.lK6T0j.rst new file mode 100644 index 00000000000000..3850e7f51583ef --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-27-14-58-26.gh-issue-137838.lK6T0j.rst @@ -0,0 +1,2 @@ +Fix JIT trace buffer overrun by pre-reserving exit stub space. Patch By +Donghee Na. From dbc5d9abb39e1e8b34b320ab1acb6dc1ca5ac363 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Aug 2025 15:50:55 +0900 Subject: [PATCH 3/8] Revert "gh-137838: Fix JIT trace buffer overrun by pre-reserving exit stub space" This reverts commit 0776a59d7cfb3945009abeca126b629470b0484f. --- Lib/test/test_sys_settrace.py | 3 --- Lib/test/test_trace.py | 6 ------ Python/optimizer.c | 24 ++++++++++++++---------- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_sys_settrace.py b/Lib/test/test_sys_settrace.py index 27fcac50ffa7c9..b3685a91c57ee7 100644 --- a/Lib/test/test_sys_settrace.py +++ b/Lib/test/test_sys_settrace.py @@ -13,7 +13,6 @@ import textwrap import subprocess import warnings - try: import _testinternalcapi except ImportError: @@ -361,8 +360,6 @@ class TraceTestCase(unittest.TestCase): # Disable gc collection when tracing, otherwise the # deallocators may be traced as well. def setUp(self): - if os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0': - self.skipTest("Line tracing behavior differs when JIT optimizer is disabled") self.using_gc = gc.isenabled() gc.disable() self.addCleanup(sys.settrace, sys.gettrace()) diff --git a/Lib/test/test_trace.py b/Lib/test/test_trace.py index 19eee19bdea6d5..bf54c9995376d6 100644 --- a/Lib/test/test_trace.py +++ b/Lib/test/test_trace.py @@ -142,8 +142,6 @@ def test_traced_func_linear(self): self.assertEqual(self.tracer.results().counts, expected) - @unittest.skipIf(os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0', - "Line counts differ when JIT optimizer is disabled") def test_traced_func_loop(self): self.tracer.runfunc(traced_func_loop, 2, 3) @@ -168,8 +166,6 @@ def test_traced_func_importing(self): self.assertEqual(self.tracer.results().counts, expected) - @unittest.skipIf(os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0', - "Line counts differ when JIT optimizer is disabled") def test_trace_func_generator(self): self.tracer.runfunc(traced_func_calling_generator) @@ -240,8 +236,6 @@ def setUp(self): self.my_py_filename = fix_ext_py(__file__) self.addCleanup(sys.settrace, sys.gettrace()) - @unittest.skipIf(os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0', - "Line counts differ when JIT optimizer is disabled") def test_exec_counts(self): self.tracer = Trace(count=1, trace=0, countfuncs=0, countcallers=0) code = r'''traced_func_loop(2, 5)''' diff --git a/Python/optimizer.c b/Python/optimizer.c index 1a99eef0cc2c2f..bae5cfa50ead58 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -540,7 +540,6 @@ add_to_trace( assert(func == NULL || func->func_code == (PyObject *)code); \ instr = trace_stack[trace_stack_depth].instr; - /* Returns the length of the trace on success, * 0 if it failed to produce a worthwhile trace, * and -1 on an error. @@ -561,10 +560,8 @@ translate_bytecode_to_trace( _Py_BloomFilter_Add(dependencies, initial_code); _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; - // Leave space for possible trailing _EXIT_TRACE and estimated exit stubs - // Reserve 20% of buffer space for exit stubs (empirically sufficient) - int max_exit_stubs = (buffer_size * 20) / 100; // 20% for exit stubs - int max_length = buffer_size - 2 - max_exit_stubs; + // Leave space for possible trailing _EXIT_TRACE + int max_length = buffer_size-2; struct { PyFunctionObject *func; PyCodeObject *code; @@ -650,7 +647,16 @@ translate_bytecode_to_trace( assert(!OPCODE_HAS_DEOPT(opcode)); } - // Note: Exit stub space is pre-reserved in max_length calculation above + if (OPCODE_HAS_EXIT(opcode)) { + // Make space for side exit and final _EXIT_TRACE: + RESERVE_RAW(2, "_EXIT_TRACE"); + max_length--; + } + if (OPCODE_HAS_ERROR(opcode)) { + // Make space for error stub and final _EXIT_TRACE: + RESERVE_RAW(2, "_ERROR_POP_N"); + max_length--; + } switch (opcode) { case POP_JUMP_IF_NONE: case POP_JUMP_IF_NOT_NONE: @@ -725,11 +731,9 @@ translate_bytecode_to_trace( { const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; if (expansion->nuops > 0) { - // Reserve space for nuops + // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE) int nuops = expansion->nuops; - - // Reserve space for nuops (exit stub space already pre-reserved) - RESERVE(nuops); + RESERVE(nuops + 1); /* One extra for exit */ int16_t last_op = expansion->uops[nuops-1].uop; if (last_op == _RETURN_VALUE || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { // Check for trace stack underflow now: From 76eec2d18248491e78cd0f8215604a980f40bb98 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Aug 2025 16:49:49 +0900 Subject: [PATCH 4/8] update --- Lib/test/test_sys_settrace.py | 2 ++ Lib/test/test_trace.py | 6 ++++++ Python/optimizer.c | 13 +------------ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_sys_settrace.py b/Lib/test/test_sys_settrace.py index b3685a91c57ee7..199a9087dfe3bc 100644 --- a/Lib/test/test_sys_settrace.py +++ b/Lib/test/test_sys_settrace.py @@ -360,6 +360,8 @@ class TraceTestCase(unittest.TestCase): # Disable gc collection when tracing, otherwise the # deallocators may be traced as well. def setUp(self): + if os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0': + self.skipTest("Line tracing behavior differs when JIT optimizer is disabled") self.using_gc = gc.isenabled() gc.disable() self.addCleanup(sys.settrace, sys.gettrace()) diff --git a/Lib/test/test_trace.py b/Lib/test/test_trace.py index bf54c9995376d6..19eee19bdea6d5 100644 --- a/Lib/test/test_trace.py +++ b/Lib/test/test_trace.py @@ -142,6 +142,8 @@ def test_traced_func_linear(self): self.assertEqual(self.tracer.results().counts, expected) + @unittest.skipIf(os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0', + "Line counts differ when JIT optimizer is disabled") def test_traced_func_loop(self): self.tracer.runfunc(traced_func_loop, 2, 3) @@ -166,6 +168,8 @@ def test_traced_func_importing(self): self.assertEqual(self.tracer.results().counts, expected) + @unittest.skipIf(os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0', + "Line counts differ when JIT optimizer is disabled") def test_trace_func_generator(self): self.tracer.runfunc(traced_func_calling_generator) @@ -236,6 +240,8 @@ def setUp(self): self.my_py_filename = fix_ext_py(__file__) self.addCleanup(sys.settrace, sys.gettrace()) + @unittest.skipIf(os.environ.get('PYTHON_UOPS_OPTIMIZE') == '0', + "Line counts differ when JIT optimizer is disabled") def test_exec_counts(self): self.tracer = Trace(count=1, trace=0, countfuncs=0, countcallers=0) code = r'''traced_func_loop(2, 5)''' diff --git a/Python/optimizer.c b/Python/optimizer.c index bae5cfa50ead58..473f2a36f86579 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -560,8 +560,7 @@ translate_bytecode_to_trace( _Py_BloomFilter_Add(dependencies, initial_code); _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; - // Leave space for possible trailing _EXIT_TRACE - int max_length = buffer_size-2; + int max_length = (buffer_size * 2) / 3; // 67% for trace, 33% for exit stubs struct { PyFunctionObject *func; PyCodeObject *code; @@ -647,16 +646,6 @@ translate_bytecode_to_trace( assert(!OPCODE_HAS_DEOPT(opcode)); } - if (OPCODE_HAS_EXIT(opcode)) { - // Make space for side exit and final _EXIT_TRACE: - RESERVE_RAW(2, "_EXIT_TRACE"); - max_length--; - } - if (OPCODE_HAS_ERROR(opcode)) { - // Make space for error stub and final _EXIT_TRACE: - RESERVE_RAW(2, "_ERROR_POP_N"); - max_length--; - } switch (opcode) { case POP_JUMP_IF_NONE: case POP_JUMP_IF_NOT_NONE: From 2a77ad0e09dfabacf6116a8d64fe929cf30c4ffa Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Aug 2025 16:55:13 +0900 Subject: [PATCH 5/8] Update comment --- Python/optimizer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 473f2a36f86579..55c64b2500c471 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -560,7 +560,11 @@ translate_bytecode_to_trace( _Py_BloomFilter_Add(dependencies, initial_code); _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; - int max_length = (buffer_size * 2) / 3; // 67% for trace, 33% for exit stubs + /* + * Assumption: 67% reserved for trace, 33% for exit stubs + * TODO: Compute the required number of exit stubs dynamically + */ + int max_length = (buffer_size * 2) / 3; struct { PyFunctionObject *func; PyCodeObject *code; From 18a8161d5289ff83905607fec5fb967d81a577a0 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Aug 2025 16:57:44 +0900 Subject: [PATCH 6/8] lint --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 55c64b2500c471..ed6e0acbc16d9a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -560,7 +560,7 @@ translate_bytecode_to_trace( _Py_BloomFilter_Add(dependencies, initial_code); _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; - /* + /* * Assumption: 67% reserved for trace, 33% for exit stubs * TODO: Compute the required number of exit stubs dynamically */ From 1d54d8701a460d26e1aebe45a8dba6a514dd3c9d Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Aug 2025 17:20:36 +0900 Subject: [PATCH 7/8] fix --- Python/optimizer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index ed6e0acbc16d9a..339972194b980a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -564,7 +564,8 @@ translate_bytecode_to_trace( * Assumption: 67% reserved for trace, 33% for exit stubs * TODO: Compute the required number of exit stubs dynamically */ - int max_length = (buffer_size * 2) / 3; + int max_exit_stubs = (buffer_size * 33) / 100; // 33% for exit stubs + int max_length = buffer_size - 2 - max_exit_stubs; struct { PyFunctionObject *func; PyCodeObject *code; @@ -724,9 +725,9 @@ translate_bytecode_to_trace( { const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; if (expansion->nuops > 0) { - // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE) + // Reserve space for nuops (exit stub space already pre-reserved) int nuops = expansion->nuops; - RESERVE(nuops + 1); /* One extra for exit */ + RESERVE(nuops); int16_t last_op = expansion->uops[nuops-1].uop; if (last_op == _RETURN_VALUE || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { // Check for trace stack underflow now: From a4e666d5b717d44d7ca924bc178c03e70c586707 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Aug 2025 17:32:12 +0900 Subject: [PATCH 8/8] revert to 20%.... --- Python/optimizer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 339972194b980a..df0db5b6472f98 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -561,10 +561,10 @@ translate_bytecode_to_trace( _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; /* - * Assumption: 67% reserved for trace, 33% for exit stubs + * Assumption: 80% reserved for trace, 20% for exit stubs * TODO: Compute the required number of exit stubs dynamically */ - int max_exit_stubs = (buffer_size * 33) / 100; // 33% for exit stubs + int max_exit_stubs = (buffer_size * 20) / 100; // 20% for exit stubs int max_length = buffer_size - 2 - max_exit_stubs; struct { PyFunctionObject *func;