Skip to content

Commit 4f29dd3

Browse files
Partially address review
1 parent 3f212a4 commit 4f29dd3

File tree

10 files changed

+69
-62
lines changed

10 files changed

+69
-62
lines changed

Include/internal/pycore_interp_structs.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -768,15 +768,14 @@ typedef struct _PyJitTracerState {
768768
int prev_instr_stacklevel;
769769
int specialize_counter;
770770
_PyUOpInstruction *code_buffer;
771-
_Py_CODEUNIT *insert_exec_instr;
771+
_Py_CODEUNIT *start_instr;
772772
_Py_CODEUNIT *close_loop_instr;
773+
_Py_CODEUNIT *jump_backward_instr;
773774
PyCodeObject *initial_code; // Strong
774775
PyFunctionObject *initial_func; // Strong
775776
_Py_CODEUNIT *prev_instr;
776777
PyCodeObject *prev_instr_code; // Strong
777778
struct _PyExitData *prev_exit;
778-
struct _PyExecutorObject *prev_executor; // Strong
779-
_Py_CODEUNIT *jump_backward_instr;
780779
_PyInterpreterFrame *prev_instr_frame;
781780
_PyBloomFilter dependencies;
782781
} _PyJitTracerState;

Include/internal/pycore_optimizer.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ typedef struct {
3636

3737
typedef struct _PyExitData {
3838
uint32_t target;
39-
uint16_t index;
40-
char is_dynamic:4;
41-
char is_control_flow:4;
39+
uint16_t index:14;
40+
uint16_t is_dynamic:1;
41+
uint16_t is_control_flow:1;
4242
_Py_BackoffCounter temperature;
4343
struct _PyExecutorObject *executor;
4444
} _PyExitData;
@@ -351,6 +351,7 @@ static inline int is_terminator(const _PyUOpInstruction *uop)
351351
int opcode = uop->opcode;
352352
return (
353353
opcode == _EXIT_TRACE ||
354+
opcode == _DEOPT ||
354355
opcode == _JUMP_TO_TOP ||
355356
opcode == _DYNAMIC_EXIT
356357
);
@@ -367,9 +368,9 @@ int _PyJit_translate_single_bytecode_to_trace(PyThreadState *tstate, _PyInterpre
367368

368369
int
369370
_PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame,
370-
_Py_CODEUNIT *curr_instr, _Py_CODEUNIT *insert_exec_instr,
371+
_Py_CODEUNIT *curr_instr, _Py_CODEUNIT *start_instr,
371372
_Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, _PyExitData *exit,
372-
_PyExecutorObject *prev_exec, int oparg);
373+
int oparg);
373374

374375
void _PyJit_FinalizeTracing(PyThreadState *tstate);
375376

Python/bytecodes.c

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,7 +1220,7 @@ dummy_func(
12201220
PyObject *result = PyStackRef_AsPyObjectSteal(retval);
12211221
if (IS_JIT_TRACING()) {
12221222
#if _Py_TIER2
1223-
_PyJit_translate_single_bytecode_to_trace(tstate, frame, next_instr);
1223+
_PyJit_translate_single_bytecode_to_trace(tstate, frame, NULL);
12241224
LEAVE_TRACING();
12251225
int err = bail_tracing_and_jit(tstate, frame);
12261226
if (err < 0) {
@@ -2983,7 +2983,7 @@ dummy_func(
29832983
oparg >>= 8;
29842984
insert_exec_at--;
29852985
}
2986-
int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, NULL, oparg);
2986+
int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, oparg);
29872987
if (succ) {
29882988
ENTER_TRACING();
29892989
}
@@ -5268,7 +5268,7 @@ dummy_func(
52685268
tier2 op(_EXIT_TRACE, (exit_p/4 --)) {
52695269
_PyExitData *exit = (_PyExitData *)exit_p;
52705270
#if defined(Py_DEBUG) && !defined(_Py_JIT)
5271-
const _Py_CODEUNIT *target = ((frame->owner >= FRAME_OWNED_BY_INTERPRETER)
5271+
const _Py_CODEUNIT *target = ((frame->owner == FRAME_OWNED_BY_INTERPRETER)
52725272
? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame))
52735273
+ exit->target;
52745274
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
@@ -5285,8 +5285,6 @@ dummy_func(
52855285
TIER2_TO_TIER2(exit->executor);
52865286
}
52875287

5288-
// Note: this is different than _COLD_EXIT/_EXIT_TRACE, as it may lead to multiple executors
5289-
// from a single exit!
52905288
tier2 op(_DYNAMIC_EXIT, (exit_p/4 --)) {
52915289
#if defined(Py_DEBUG) && !defined(_Py_JIT)
52925290
_PyExitData *exit = (_PyExitData *)exit_p;
@@ -5415,7 +5413,8 @@ dummy_func(
54155413
}
54165414

54175415
tier2 op(_DEOPT, (--)) {
5418-
GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET());
5416+
GOTO_TIER_ONE((frame->owner == FRAME_OWNED_BY_INTERPRETER)
5417+
? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame) + CURRENT_TARGET());
54195418
}
54205419

54215420
tier2 op(_HANDLE_PENDING_AND_DEOPT, (--)) {
@@ -5445,8 +5444,8 @@ dummy_func(
54455444
tier2 op(_COLD_EXIT, ( -- )) {
54465445
_PyExitData *exit = tstate->jit_exit;
54475446
assert(exit != NULL);
5448-
_Py_CODEUNIT *target = ((frame->owner >= FRAME_OWNED_BY_INTERPRETER)
5449-
? (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame)) + exit->target;
5447+
assert(frame->owner < FRAME_OWNED_BY_INTERPRETER);
5448+
_Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target;
54505449
_Py_BackoffCounter temperature = exit->temperature;
54515450
_PyExecutorObject *executor;
54525451
if (target->op.code == ENTER_EXECUTOR) {
@@ -5458,9 +5457,6 @@ dummy_func(
54585457
TIER2_TO_TIER2(exit->executor);
54595458
}
54605459
else {
5461-
if (frame->owner >= FRAME_OWNED_BY_INTERPRETER) {
5462-
GOTO_TIER_ONE(target);
5463-
}
54645460
if (!backoff_counter_triggers(temperature)) {
54655461
exit->temperature = advance_backoff_counter(temperature);
54665462
GOTO_TIER_ONE(target);
@@ -5473,7 +5469,7 @@ dummy_func(
54735469
// Note: it's safe to use target->op.arg here instead of the oparg given by EXTENDED_ARG.
54745470
// The invariant in the optimizer is the deopt target always points back to the first EXTENDED_ARG.
54755471
// So setting it to anything else is wrong.
5476-
int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg);
5472+
int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, target->op.arg);
54775473
exit->temperature = restart_backoff_counter(exit->temperature);
54785474
if (succ) {
54795475
GOTO_TIER_ONE_CONTINUE_TRACING(target);
@@ -5483,6 +5479,7 @@ dummy_func(
54835479
}
54845480

54855481
tier2 op(_COLD_DYNAMIC_EXIT, ( -- )) {
5482+
// TODO (gh-139109): This should be similar to _COLD_EXIT in the future.
54865483
_Py_CODEUNIT *target = frame->instr_ptr;
54875484
GOTO_TIER_ONE(target);
54885485
}

Python/ceval.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,9 +1105,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
11051105
_Py_CODEUNIT *next_instr;
11061106
_PyStackRef *stack_pointer;
11071107
entry.stack[0] = PyStackRef_NULL;
1108-
entry.frame.f_funcobj = PyStackRef_NULL;
1109-
#if defined(Py_DEBUG)
1108+
#ifdef Py_STACKREF_DEBUG
1109+
entry.frame.f_funcobj = PyStackRef_None;
1110+
#elif defined(Py_DEBUG)
11101111
/* Set these to invalid but identifiable values for debugging. */
1112+
entry.frame.f_funcobj = (_PyStackRef){.bits = 0xaaa0};
11111113
entry.frame.f_locals = (PyObject*)0xaaa1;
11121114
entry.frame.frame_obj = (PyFrameObject*)0xaaa2;
11131115
entry.frame.f_globals = (PyObject*)0xaaa3;

Python/executor_cases.c.h

Lines changed: 6 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/generated_cases.c.h

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ _PyOptimizer_Optimize(
136136
chain_depth %= MAX_CHAIN_DEPTH;
137137
bool progress_needed = chain_depth == 0;
138138
PyCodeObject *code = (PyCodeObject *)tstate->interp->jit_state.initial_code;
139-
_Py_CODEUNIT *start = tstate->interp->jit_state.insert_exec_instr;
139+
_Py_CODEUNIT *start = tstate->interp->jit_state.start_instr;
140140
if (progress_needed && !has_space_for_executor(code, start)) {
141141
interp->compiling = false;
142142
return 0;
@@ -608,10 +608,6 @@ _PyJit_translate_single_bytecode_to_trace(
608608
// Strange control-flow
609609
bool has_dynamic_jump_taken = OPCODE_HAS_UNPREDICTABLE_JUMP(opcode) &&
610610
(next_instr != this_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]);
611-
if (has_dynamic_jump_taken) {
612-
DPRINTF(2, "Unsupported: dynamic jump taken\n");
613-
goto unsupported;
614-
}
615611

616612
/* Special case the first instruction,
617613
* so that we can guarantee forward progress */
@@ -624,6 +620,10 @@ _PyJit_translate_single_bytecode_to_trace(
624620
}
625621

626622
bool needs_guard_ip = _PyOpcode_NeedsGuardIp[opcode];
623+
if (has_dynamic_jump_taken && !needs_guard_ip) {
624+
DPRINTF(2, "Unsupported: dynamic jump taken\n");
625+
goto unsupported;
626+
}
627627
DPRINTF(2, "%p %d: %s(%d) %d %d\n", old_code, target, _PyOpcode_OpName[opcode], oparg, needs_guard_ip, old_stack_level);
628628

629629
#ifdef Py_DEBUG
@@ -749,7 +749,7 @@ _PyJit_translate_single_bytecode_to_trace(
749749
case JUMP_BACKWARD_NO_INTERRUPT:
750750
{
751751
if ((next_instr != tstate->interp->jit_state.close_loop_instr) &&
752-
(next_instr != tstate->interp->jit_state.insert_exec_instr) &&
752+
(next_instr != tstate->interp->jit_state.start_instr) &&
753753
tstate->interp->jit_state.code_curr_size > 5 &&
754754
// These are coroutines, and we want to unroll those usually.
755755
opcode != JUMP_BACKWARD_NO_INTERRUPT) {
@@ -760,7 +760,7 @@ _PyJit_translate_single_bytecode_to_trace(
760760
OPT_STAT_INC(inner_loop);
761761
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
762762
trace[trace_length-1].operand1 = true; // is_control_flow
763-
DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr, tstate->interp->jit_state.close_loop_instr, tstate->interp->jit_state.insert_exec_instr);
763+
DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr, tstate->interp->jit_state.close_loop_instr, tstate->interp->jit_state.start_instr);
764764
goto done;
765765
}
766766
break;
@@ -772,7 +772,9 @@ _PyJit_translate_single_bytecode_to_trace(
772772
* start with RESUME_CHECK */
773773
ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target);
774774
break;
775-
775+
case INTERPRETER_EXIT:
776+
ADD_TO_TRACE(_DEOPT, 0, 0, target);
777+
goto done;;
776778
default:
777779
{
778780
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
@@ -862,18 +864,18 @@ _PyJit_translate_single_bytecode_to_trace(
862864
PyCodeObject *new_code = (PyCodeObject *)PyStackRef_AsPyObjectBorrow(frame->f_executable);
863865
PyFunctionObject *new_func = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
864866

865-
if (new_func != NULL) {
866-
operand = (uintptr_t)new_func;
867-
DPRINTF(2, "Adding %p func to op\n", (void *)operand);
868-
_Py_BloomFilter_Add(dependencies, new_func);
869-
}
870-
else if (new_code != NULL && !Py_IsNone((PyObject*)new_code)) {
871-
operand = (uintptr_t)new_code | 1;
872-
DPRINTF(2, "Adding %p code to op\n", (void *)operand);
873-
_Py_BloomFilter_Add(dependencies, new_code);
874-
}
875-
else {
876-
operand = 0;
867+
operand = 0;
868+
if (frame->owner < FRAME_OWNED_BY_INTERPRETER) {
869+
if (new_func != NULL) {
870+
operand = (uintptr_t)new_func;
871+
DPRINTF(2, "Adding %p func to op\n", (void *)operand);
872+
_Py_BloomFilter_Add(dependencies, new_func);
873+
}
874+
else if (new_code != NULL && !Py_IsNone((PyObject*)new_code)) {
875+
operand = (uintptr_t)new_code | 1;
876+
DPRINTF(2, "Adding %p code to op\n", (void *)operand);
877+
_Py_BloomFilter_Add(dependencies, new_code);
878+
}
877879
}
878880
ADD_TO_TRACE(uop, oparg, operand, target);
879881
trace[trace_length - 1].operand1 = PyStackRef_IsNone(frame->f_executable) ? 2 : ((int)(frame->stackpointer - _PyFrame_Stackbase(frame)));
@@ -913,8 +915,11 @@ _PyJit_translate_single_bytecode_to_trace(
913915
}
914916
}
915917
// Loop back to the start
916-
int is_first_instr = tstate->interp->jit_state.close_loop_instr == next_instr || tstate->interp->jit_state.insert_exec_instr == next_instr;
918+
int is_first_instr = tstate->interp->jit_state.close_loop_instr == next_instr || tstate->interp->jit_state.start_instr == next_instr;
917919
if (is_first_instr && tstate->interp->jit_state.code_curr_size > 5) {
920+
if (needs_guard_ip) {
921+
ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)next_instr, 0);
922+
}
918923
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
919924
goto done;
920925
}
@@ -945,7 +950,10 @@ _PyJit_translate_single_bytecode_to_trace(
945950

946951
// Returns 0 for do not enter tracing, 1 on enter tracing.
947952
int
948-
_PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *curr_instr, _Py_CODEUNIT *insert_exec_instr, _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, _PyExitData *exit, _PyExecutorObject *prev_exec, int oparg)
953+
_PyJit_TryInitializeTracing(
954+
PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *curr_instr,
955+
_Py_CODEUNIT *start_instr, _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth,
956+
_PyExitData *exit, int oparg)
949957
{
950958
// A recursive trace.
951959
// Don't trace into the inner call because it will stomp on the previous trace, causing endless retraces.
@@ -972,12 +980,12 @@ _PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, _
972980
chain_depth);
973981
#endif
974982

975-
add_to_trace(tstate->interp->jit_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)insert_exec_instr, INSTR_IP(insert_exec_instr, code));
983+
add_to_trace(tstate->interp->jit_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code));
976984
add_to_trace(tstate->interp->jit_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0);
977985
tstate->interp->jit_state.code_curr_size = 2;
978986

979987
tstate->interp->jit_state.code_max_size = UOP_MAX_TRACE_LENGTH;
980-
tstate->interp->jit_state.insert_exec_instr = insert_exec_instr;
988+
tstate->interp->jit_state.start_instr = start_instr;
981989
tstate->interp->jit_state.close_loop_instr = close_loop_instr;
982990
tstate->interp->jit_state.initial_code = (PyCodeObject *)Py_NewRef(code);
983991
tstate->interp->jit_state.initial_func = (PyFunctionObject *)Py_XNewRef(PyStackRef_AsPyObjectBorrow(frame->f_funcobj));
@@ -993,9 +1001,9 @@ _PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, _
9931001
tstate->interp->jit_state.prev_instr_oparg = oparg;
9941002
tstate->interp->jit_state.prev_instr_stacklevel = curr_stackdepth;
9951003
tstate->interp->jit_state.prev_instr_is_super = false;
996-
assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (prev_exec != NULL && exit != NULL));
1004+
assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (exit != NULL));
9971005
tstate->interp->jit_state.jump_backward_instr = curr_instr;
998-
tstate->interp->jit_state.prev_executor = (_PyExecutorObject *)Py_XNewRef(prev_exec);
1006+
assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (exit != NULL));
9991007
_Py_BloomFilter_Init(&tstate->interp->jit_state.dependencies);
10001008
return 1;
10011009
}
@@ -1006,7 +1014,6 @@ _PyJit_FinalizeTracing(PyThreadState *tstate)
10061014
Py_CLEAR(tstate->interp->jit_state.initial_code);
10071015
Py_CLEAR(tstate->interp->jit_state.initial_func);
10081016
Py_CLEAR(tstate->interp->jit_state.prev_instr_code);
1009-
Py_CLEAR(tstate->interp->jit_state.prev_executor);
10101017
tstate->interp->jit_state.code_curr_size = 2;
10111018
tstate->interp->jit_state.code_max_size = UOP_MAX_TRACE_LENGTH - 1;
10121019
}
@@ -1077,9 +1084,6 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length)
10771084
for (int i = 0; i < length; i++) {
10781085
_PyUOpInstruction *inst = &buffer[i];
10791086
int opcode = inst->opcode;
1080-
if (inst->format != UOP_FORMAT_TARGET) {
1081-
fprintf(stdout, "I: %d\n", i);
1082-
}
10831087
int32_t target = (int32_t)uop_get_target(inst);
10841088
uint16_t exit_flags = _PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG | HAS_PERIODIC_FLAG);
10851089
if (exit_flags) {

Python/optimizer_analysis.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
499499
}
500500
case _JUMP_TO_TOP:
501501
case _DYNAMIC_EXIT:
502+
case _DEOPT:
502503
return pc + 1;
503504
}
504505
}

Python/optimizer_bytecodes.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,10 @@ dummy_func(void) {
10221022
ctx->done = true;
10231023
}
10241024

1025+
op(_DEOPT, (--)) {
1026+
ctx->done = true;
1027+
}
1028+
10251029
op(_REPLACE_WITH_TRUE, (value -- res)) {
10261030
REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_True);
10271031
res = sym_new_const(ctx, Py_True);

0 commit comments

Comments
 (0)