Skip to content

Commit 18c78b1

Browse files
committed
Support limited scalar replacement for replicated uops in the code generator. Use it to support efficient specializations of COPY and SWAP in the JIT.
1 parent b102f09 commit 18c78b1

File tree

9 files changed

+313
-198
lines changed

9 files changed

+313
-198
lines changed

Include/internal/pycore_uop_ids.h

Lines changed: 176 additions & 171 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_metadata.h

Lines changed: 30 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/bytecodes.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4946,8 +4946,7 @@ dummy_func(
49464946
res = PyStackRef_FromPyObjectSteal(res_o);
49474947
}
49484948

4949-
pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
4950-
assert(oparg > 0);
4949+
pure replicate(1:4) inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
49514950
top = PyStackRef_DUP(bottom);
49524951
}
49534952

@@ -4980,12 +4979,11 @@ dummy_func(
49804979

49814980
macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP;
49824981

4983-
pure inst(SWAP, (bottom, unused[oparg-2], top --
4982+
pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top --
49844983
bottom, unused[oparg-2], top)) {
49854984
_PyStackRef temp = bottom;
49864985
bottom = top;
49874986
top = temp;
4988-
assert(oparg >= 2);
49894987
}
49904988

49914989
inst(INSTRUMENTED_LINE, ( -- )) {

Python/executor_cases.c.h

Lines changed: 59 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/generated_cases.c.h

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,8 +1292,8 @@ uop_optimize(
12921292
for (int pc = 0; pc < length; pc++) {
12931293
int opcode = buffer[pc].opcode;
12941294
int oparg = buffer[pc].oparg;
1295-
if (oparg < _PyUop_Replication[opcode]) {
1296-
buffer[pc].opcode = opcode + oparg + 1;
1295+
if (oparg < _PyUop_Replication[opcode].stop && oparg >= _PyUop_Replication[opcode].start) {
1296+
buffer[pc].opcode = opcode + oparg + 1 - _PyUop_Replication[opcode].start;
12971297
assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
12981298
}
12991299
else if (is_terminator(&buffer[pc])) {

Tools/cases_generator/analyzer.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ class Uop:
180180
properties: Properties
181181
_size: int = -1
182182
implicitly_created: bool = False
183-
replicated = 0
183+
replicated = range(0)
184184
replicates: "Uop | None" = None
185185
# Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro
186186
instruction_size: int | None = None
@@ -868,6 +868,28 @@ def compute_properties(op: parser.CodeDef) -> Properties:
868868
needs_prev=variable_used(op, "prev_instr"),
869869
)
870870

871+
def expand(items: list[StackItem], oparg: int) -> list[StackItem]:
872+
index = -1
873+
for i, item in enumerate(items):
874+
if "oparg" in item.size:
875+
if index >= 0:
876+
return items
877+
index = i
878+
if index < 0:
879+
return items
880+
try:
881+
count = int(eval(items[index].size.replace("oparg", str(oparg))))
882+
except ValueError:
883+
return items
884+
return items[:index] + [
885+
StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count)
886+
] + items[index+1:]
887+
888+
def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect:
889+
# Only scalarize if no more than one input or output is array
890+
stack.inputs = expand(stack.inputs, oparg)
891+
stack.outputs = expand(stack.outputs, oparg)
892+
return stack
871893

872894
def make_uop(
873895
name: str,
@@ -887,20 +909,26 @@ def make_uop(
887909
)
888910
for anno in op.annotations:
889911
if anno.startswith("replicate"):
890-
result.replicated = int(anno[10:-1])
912+
text = anno[10:-1]
913+
start, stop = text.split(":")
914+
result.replicated = range(int(start), int(stop))
891915
break
892916
else:
893917
return result
894-
for oparg in range(result.replicated):
918+
for oparg in result.replicated:
895919
name_x = name + "_" + str(oparg)
896920
properties = compute_properties(op)
897921
properties.oparg = False
898-
properties.const_oparg = oparg
922+
stack = analyze_stack(op)
923+
if not variable_used(op, "oparg"):
924+
stack = scalarize_stack(stack, oparg)
925+
else:
926+
properties.const_oparg = oparg
899927
rep = Uop(
900928
name=name_x,
901929
context=op.context,
902930
annotations=op.annotations,
903-
stack=analyze_stack(op),
931+
stack=stack,
904932
caches=analyze_caches(inputs),
905933
local_stores=find_variable_stores(op),
906934
body=op.block,

Tools/cases_generator/parsing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,9 +379,13 @@ def inst_header(self) -> InstHeader | None:
379379
while anno := self.expect(lx.ANNOTATION):
380380
if anno.text == "replicate":
381381
self.require(lx.LPAREN)
382-
times = self.require(lx.NUMBER)
382+
stop = self.require(lx.NUMBER)
383+
start_text = "0"
384+
if self.expect(lx.COLON):
385+
start_text = stop.text
386+
stop = self.require(lx.NUMBER)
383387
self.require(lx.RPAREN)
384-
annotations.append(f"replicate({times.text})")
388+
annotations.append(f"replicate({start_text}:{stop.text})")
385389
else:
386390
annotations.append(anno.text)
387391
tkn = self.expect(lx.INST)

Tools/cases_generator/uop_metadata_generator.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424

2525
def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
2626
out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n")
27-
out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n")
27+
out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n")
28+
out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n")
2829
out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n")
2930
out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n")
3031
out.emit("#ifdef NEED_OPCODE_METADATA\n")
@@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
3435
out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n")
3536

3637
out.emit("};\n\n")
37-
out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n")
38+
out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n")
3839
for uop in analysis.uops.values():
3940
if uop.replicated:
40-
out.emit(f"[{uop.name}] = {uop.replicated},\n")
41+
assert(uop.replicated.step == 1)
42+
out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, {uop.replicated.stop} }},\n")
4143

4244
out.emit("};\n\n")
4345
out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n")

0 commit comments

Comments
 (0)