diff --git a/doc/string/partition.rdoc b/doc/string/partition.rdoc
index ece034ee66225e..ee445bd21f32ba 100644
--- a/doc/string/partition.rdoc
+++ b/doc/string/partition.rdoc
@@ -29,7 +29,7 @@ If +pattern+ is a Regexp, performs the equivalent of self.match(pattern) ["hello", "", ""]
If +pattern+ is not a Regexp, converts it to a string (if it is not already one),
-then performs the equivalet of self.index(pattern)
+then performs the equivalent of self.index(pattern)
(and does _not_ set {pattern-matching global variables}[rdoc-ref:globals.md@Pattern+Matching]):
'hello'.partition('h') # => ["", "h", "ello"]
diff --git a/insns.def b/insns.def
index eef0d3f5dc1124..69a8210d7d6f99 100644
--- a/insns.def
+++ b/insns.def
@@ -1470,6 +1470,7 @@ opt_ltlt
* string. Then what happens if that codepoint does not exist in the
* string's encoding? Of course an exception. That's not a leaf. */
// attr bool leaf = false; /* has "invalid codepoint" exception */
+// attr bool zjit_profile = true;
{
val = vm_opt_ltlt(recv, obj);
@@ -1537,6 +1538,7 @@ opt_aset
/* This is another story than opt_aref. When vm_opt_aset() resorts
* to rb_hash_aset(), which should call #hash for `obj`. */
// attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */
+// attr bool zjit_profile = true;
{
val = vm_opt_aset(recv, obj, set);
@@ -1551,6 +1553,7 @@ opt_length
(CALL_DATA cd)
(VALUE recv)
(VALUE val)
+// attr bool zjit_profile = true;
{
val = vm_opt_length(recv, BOP_LENGTH);
@@ -1565,6 +1568,7 @@ opt_size
(CALL_DATA cd)
(VALUE recv)
(VALUE val)
+// attr bool zjit_profile = true;
{
val = vm_opt_length(recv, BOP_SIZE);
@@ -1624,6 +1628,7 @@ opt_regexpmatch2
(VALUE obj2, VALUE obj1)
(VALUE val)
// attr bool leaf = false; /* match_at() has rb_thread_check_ints() */
+// attr bool zjit_profile = true;
{
val = vm_opt_regexpmatch2(obj2, obj1);
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index 40df3ae4d5830b..8205d6de76bd2f 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -1602,7 +1602,7 @@ impl Assembler
if c_args.len() > 0 {
// Resolve C argument dependencies
let c_args_len = c_args.len() as isize;
- let moves = Self::reorder_reg_moves(&c_args.drain(..).collect());
+ let moves = Self::reorder_reg_moves(&std::mem::take(&mut c_args));
shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len);
// Push batched C arguments
diff --git a/zjit/src/asm/mod.rs b/zjit/src/asm/mod.rs
index aeb429382d88c0..32dc633a2941ce 100644
--- a/zjit/src/asm/mod.rs
+++ b/zjit/src/asm/mod.rs
@@ -20,7 +20,6 @@ pub mod arm64;
pub struct Label(pub usize);
/// Reference to an ASM label
-#[derive(Clone)]
pub struct LabelRef {
// Position in the code block where the label reference exists
pos: usize,
@@ -34,7 +33,7 @@ pub struct LabelRef {
num_bytes: usize,
/// The object that knows how to encode the branch instruction.
- encode: fn(&mut CodeBlock, i64, i64)
+ encode: Box,
}
/// Block of memory into which instructions can be assembled
@@ -223,11 +222,11 @@ impl CodeBlock {
}
// Add a label reference at the current write position
- pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: fn(&mut CodeBlock, i64, i64)) {
+ pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: impl Fn(&mut CodeBlock, i64, i64) + 'static) {
assert!(label.0 < self.label_addrs.len());
// Keep track of the reference
- self.label_refs.push(LabelRef { pos: self.write_pos, label, num_bytes, encode });
+ self.label_refs.push(LabelRef { pos: self.write_pos, label, num_bytes, encode: Box::new(encode) });
// Move past however many bytes the instruction takes up
if self.write_pos + num_bytes < self.mem_size {
@@ -251,7 +250,7 @@ impl CodeBlock {
assert!(label_addr < self.mem_size);
self.write_pos = ref_pos;
- (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
+ (label_ref.encode.as_ref())(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
// Assert that we've written the same number of bytes that we
// expected to have written.
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
index 5ac62c059986d5..6750926b35daa1 100644
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@@ -113,8 +113,8 @@ fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
1
} else {
- let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
- br(cb, Assembler::SCRATCH0);
+ let num_insns = emit_load_value(cb, Assembler::EMIT0_OPND, dst_addr as u64);
+ br(cb, Assembler::EMIT0_OPND);
num_insns + 1
};
@@ -181,7 +181,7 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
/// List of registers that can be used for register allocation.
/// This has the same number of registers for x86_64 and arm64.
-/// SCRATCH0 and SCRATCH1 are excluded.
+/// SCRATCH_OPND, EMIT0_OPND, and EMIT1_OPND are excluded.
pub const ALLOC_REGS: &[Reg] = &[
X0_REG,
X1_REG,
@@ -193,17 +193,33 @@ pub const ALLOC_REGS: &[Reg] = &[
X12_REG,
];
-impl Assembler
-{
- /// Special scratch registers for intermediate processing.
- /// This register is call-clobbered (so we don't have to save it before using it).
- /// Avoid using if you can since this is used to lower [Insn] internally and
- /// so conflicts are possible.
- pub const SCRATCH_REG: Reg = X16_REG;
- const SCRATCH0_REG: Reg = Self::SCRATCH_REG;
- const SCRATCH1_REG: Reg = X17_REG;
- const SCRATCH0: A64Opnd = A64Opnd::Reg(Self::SCRATCH0_REG);
- const SCRATCH1: A64Opnd = A64Opnd::Reg(Self::SCRATCH1_REG);
+/// Special scratch register for intermediate processing. It should be used only by
+/// [`Assembler::arm64_split_with_scratch_reg`] or [`Assembler::new_with_scratch_reg`].
+const SCRATCH_OPND: Opnd = Opnd::Reg(X15_REG);
+
+impl Assembler {
+ /// Special registers for intermediate processing in arm64_emit. It should be used only by arm64_emit.
+ /// TODO: Remove the use of these registers by splitting instructions in arm64_split_with_scratch_reg.
+ const EMIT0_REG: Reg = X16_REG;
+ const EMIT1_REG: Reg = X17_REG;
+ const EMIT0_OPND: A64Opnd = A64Opnd::Reg(Self::EMIT0_REG);
+ const EMIT1_OPND: A64Opnd = A64Opnd::Reg(Self::EMIT1_REG);
+
+ /// Return an Assembler with scratch registers disabled in the backend, and a scratch register.
+ pub fn new_with_scratch_reg() -> (Self, Opnd) {
+ (Self::new_with_label_names(Vec::default(), 0, true), SCRATCH_OPND)
+ }
+
+ /// Return true if opnd contains a scratch reg
+ pub fn has_scratch_reg(opnd: Opnd) -> bool {
+ match opnd {
+ Opnd::Reg(_) => opnd == SCRATCH_OPND,
+ Opnd::Mem(Mem { base: MemBase::Reg(reg_no), .. }) => {
+ reg_no == SCRATCH_OPND.unwrap_reg().reg_no
+ }
+ _ => false,
+ }
+ }
/// Get the list of registers from which we will allocate on this platform
pub fn get_alloc_regs() -> Vec {
@@ -372,7 +388,7 @@ impl Assembler
let live_ranges: Vec = take(&mut self.live_ranges);
let mut iterator = self.insns.into_iter().enumerate().peekable();
- let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len());
+ let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len(), self.accept_scratch_reg);
let asm = &mut asm_local;
while let Some((index, mut insn)) = iterator.next() {
@@ -555,14 +571,6 @@ impl Assembler
}
}
},
- Insn::IncrCounter { mem, value } => {
- let counter_addr = match mem {
- Opnd::Mem(_) => asm.lea(*mem),
- _ => *mem
- };
-
- asm.incr_counter(counter_addr, *value);
- },
Insn::JmpOpnd(opnd) => {
if let Opnd::Mem(_) = opnd {
let opnd0 = split_load_operand(asm, *opnd);
@@ -679,6 +687,58 @@ impl Assembler
asm_local
}
+ /// Split instructions using scratch registers. To maximize the use of the register pool for
+ /// VRegs, most splits should happen in [`Self::arm64_split`]. However, some instructions
+ /// need to be split with registers after `alloc_regs`, e.g. for `compile_side_exits`, so this
+ /// splits them and uses scratch registers for it.
+ fn arm64_split_with_scratch_reg(mut self) -> Assembler {
+ let mut iterator = self.insns.into_iter().enumerate().peekable();
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), self.live_ranges.len(), true);
+
+ while let Some((_, mut insn)) = iterator.next() {
+ match &mut insn {
+ // For compile_side_exits, support splitting simple C arguments here
+ Insn::CCall { opnds, .. } if !opnds.is_empty() => {
+ for (i, opnd) in opnds.iter().enumerate() {
+ asm.load_into(C_ARG_OPNDS[i], *opnd);
+ }
+ *opnds = vec![];
+ asm.push_insn(insn);
+ }
+ &mut Insn::Lea { opnd, out } => {
+ match (opnd, out) {
+ // Split here for compile_side_exits
+ (Opnd::Mem(_), Opnd::Mem(_)) => {
+ asm.lea_into(SCRATCH_OPND, opnd);
+ asm.store(out, SCRATCH_OPND);
+ }
+ _ => {
+ asm.push_insn(insn);
+ }
+ }
+ }
+ // Convert Opnd::const_ptr into Opnd::Mem. It's split here compile_side_exits.
+ &mut Insn::IncrCounter { mem, value } => {
+ assert!(matches!(mem, Opnd::UImm(_)));
+ asm.load_into(SCRATCH_OPND, mem);
+ asm.lea_into(SCRATCH_OPND, Opnd::mem(64, SCRATCH_OPND, 0));
+ asm.incr_counter(SCRATCH_OPND, value);
+ }
+ // Resolve ParallelMov that couldn't be handled without a scratch register.
+ Insn::ParallelMov { moves } => {
+ for (reg, opnd) in Self::resolve_parallel_moves(moves, Some(SCRATCH_OPND)).unwrap() {
+ asm.load_into(Opnd::Reg(reg), opnd);
+ }
+ }
+ _ => {
+ asm.push_insn(insn);
+ }
+ }
+ }
+
+ asm
+ }
+
/// Emit platform-specific machine code
/// Returns a list of GC offsets. Can return failure to signal caller to retry.
fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Option> {
@@ -739,8 +799,8 @@ impl Assembler
// that if it doesn't match it will skip over the
// instructions used for branching.
bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into());
- emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
- br(cb, Assembler::SCRATCH0);
+ emit_load_value(cb, Assembler::EMIT0_OPND, dst_addr as u64);
+ br(cb, Assembler::EMIT0_OPND);
// Here we'll return the number of instructions that it
// took to write out the destination address + 1 for the
@@ -806,8 +866,8 @@ impl Assembler
} else {
cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
}
- emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
- br(cb, Assembler::SCRATCH0);
+ emit_load_value(cb, Assembler::EMIT0_OPND, dst_addr);
+ br(cb, Assembler::EMIT0_OPND);
}
*/
} else {
@@ -979,7 +1039,7 @@ impl Assembler
(Some(Insn::JoMul(_)), _) |
(Some(Insn::PosMarker(_)), Some(Insn::JoMul(_))) => {
// Compute the high 64 bits
- smulh(cb, Self::SCRATCH0, left.into(), right.into());
+ smulh(cb, Self::EMIT0_OPND, left.into(), right.into());
// Compute the low 64 bits
// This may clobber one of the input registers,
@@ -988,11 +1048,11 @@ impl Assembler
// Produce a register that is all zeros or all ones
// Based on the sign bit of the 64-bit mul result
- asr(cb, Self::SCRATCH1, out.into(), A64Opnd::UImm(63));
+ asr(cb, Self::EMIT1_OPND, out.into(), A64Opnd::UImm(63));
// If the high 64-bits are not all zeros or all ones,
// matching the sign bit, then we have an overflow
- cmp(cb, Self::SCRATCH0, Self::SCRATCH1);
+ cmp(cb, Self::EMIT0_OPND, Self::EMIT1_OPND);
// Insn::JoMul will emit_conditional_jump::<{Condition::NE}>
}
_ => {
@@ -1021,77 +1081,57 @@ impl Assembler
Insn::LShift { opnd, shift, out } => {
lsl(cb, out.into(), opnd.into(), shift.into());
},
- store_insn @ Insn::Store { dest, src } => {
- // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are
- // accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory
- // base register to gurantee things will work.
+ Insn::Store { dest, src } => {
+ // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are accepted.
let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else {
panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}");
};
- // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
- // so we panic to get the author to change their code.
- #[track_caller]
- fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
- assert_ne!(
- backend_use.reg_no,
- user_use,
- "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
- );
- }
-
- // Split src into SCRATCH0 if necessary
+ // Split src into EMIT0_OPND if necessary
let src_reg: A64Reg = match src {
Opnd::Reg(reg) => *reg,
// Use zero register when possible
Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG,
// Immediates
&Opnd::Imm(imm) => {
- assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
- emit_load_value(cb, Self::SCRATCH0, imm as u64);
- Self::SCRATCH0_REG
+ emit_load_value(cb, Self::EMIT0_OPND, imm as u64);
+ Self::EMIT0_REG
}
&Opnd::UImm(imm) => {
- assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
- emit_load_value(cb, Self::SCRATCH0, imm);
- Self::SCRATCH0_REG
+ emit_load_value(cb, Self::EMIT0_OPND, imm);
+ Self::EMIT0_REG
}
&Opnd::Value(value) => {
- assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
- emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value);
- Self::SCRATCH0_REG
+ emit_load_gc_value(cb, &mut gc_offsets, Self::EMIT0_OPND, value);
+ Self::EMIT0_REG
}
src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => {
- // For mem-to-mem store, load the source into SCRATCH0
- assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
+ // For mem-to-mem store, load the source into EMIT0_OPND
let src_mem = if mem_disp_fits_bits(src_disp) {
src_mem.into()
} else {
- // Split the load address into SCRATCH0 first if necessary
- assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG);
- load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp);
- A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0)
+ // Split the load address into EMIT0_OPND first if necessary
+ load_effective_address(cb, Self::EMIT0_OPND, src_base_reg_no, src_disp);
+ A64Opnd::new_mem(dest_num_bits, Self::EMIT0_OPND, 0)
};
match src_num_bits {
- 64 | 32 => ldur(cb, Self::SCRATCH0, src_mem),
- 16 => ldurh(cb, Self::SCRATCH0, src_mem),
- 8 => ldurb(cb, Self::SCRATCH0, src_mem),
+ 64 | 32 => ldur(cb, Self::EMIT0_OPND, src_mem),
+ 16 => ldurh(cb, Self::EMIT0_OPND, src_mem),
+ 8 => ldurb(cb, Self::EMIT0_OPND, src_mem),
num_bits => panic!("unexpected num_bits: {num_bits}")
};
- Self::SCRATCH0_REG
+ Self::EMIT0_REG
}
src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}")
};
let src = A64Opnd::Reg(src_reg);
- // Split dest into SCRATCH1 if necessary.
+ // Split dest into EMIT1_OPND if necessary.
let dest = if mem_disp_fits_bits(disp) {
dest.into()
} else {
- assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG);
- assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG);
- load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp);
- A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0)
+ load_effective_address(cb, Self::EMIT1_OPND, base_reg_no, disp);
+ A64Opnd::new_mem(dest_num_bits, Self::EMIT1_OPND, 0)
};
// This order may be surprising but it is correct. The way
@@ -1169,10 +1209,10 @@ impl Assembler
if let Target::Label(label_idx) = target {
// Set output to the raw address of the label
cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
- adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
+ adr(cb, Self::EMIT0_OPND, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
});
- mov(cb, out.into(), Self::SCRATCH0);
+ mov(cb, out.into(), Self::EMIT0_OPND);
} else {
// Set output to the jump target's raw address
let target_code = target.unwrap_code_ptr();
@@ -1197,15 +1237,15 @@ impl Assembler
}
// Push the flags/state register
- mrs(cb, Self::SCRATCH0, SystemRegister::NZCV);
- emit_push(cb, Self::SCRATCH0);
+ mrs(cb, Self::EMIT0_OPND, SystemRegister::NZCV);
+ emit_push(cb, Self::EMIT0_OPND);
},
Insn::CPopAll => {
let regs = Assembler::get_caller_save_regs();
// Pop the state/flags register
- msr(cb, SystemRegister::NZCV, Self::SCRATCH0);
- emit_pop(cb, Self::SCRATCH0);
+ msr(cb, SystemRegister::NZCV, Self::EMIT0_OPND);
+ emit_pop(cb, Self::EMIT0_OPND);
for reg in regs.into_iter().rev() {
emit_pop(cb, A64Opnd::Reg(reg));
@@ -1221,8 +1261,8 @@ impl Assembler
if b_offset_fits_bits((dst_addr - src_addr) / 4) {
bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
} else {
- emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
- blr(cb, Self::SCRATCH0);
+ emit_load_value(cb, Self::EMIT0_OPND, dst_addr as u64);
+ blr(cb, Self::EMIT0_OPND);
}
},
Insn::CRet { .. } => {
@@ -1302,17 +1342,17 @@ impl Assembler
let label = cb.new_label("incr_counter_loop".to_string());
cb.write_label(label);
- ldaxr(cb, Self::SCRATCH0, mem.into());
- add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into());
+ ldaxr(cb, Self::EMIT0_OPND, mem.into());
+ add(cb, Self::EMIT0_OPND, Self::EMIT0_OPND, value.into());
// The status register that gets used to track whether or
// not the store was successful must be 32 bytes. Since we
- // store the SCRATCH registers as their 64-bit versions, we
+ // store the EMIT registers as their 64-bit versions, we
// need to rewrap it here.
- let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32));
- stlxr(cb, status, Self::SCRATCH0, mem.into());
+ let status = A64Opnd::Reg(Self::EMIT1_REG.with_num_bits(32));
+ stlxr(cb, status, Self::EMIT0_OPND, mem.into());
- cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0));
+ cmp(cb, Self::EMIT1_OPND, A64Opnd::new_uimm(0));
emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label));
},
Insn::Breakpoint => {
@@ -1363,9 +1403,16 @@ impl Assembler
/// Optimize and compile the stored instructions
pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Result<(CodePtr, Vec), CompileError> {
+ // The backend is allowed to use scratch registers only if it has not accepted them so far.
+ let use_scratch_reg = !self.accept_scratch_reg;
+
let asm = self.arm64_split();
let mut asm = asm.alloc_regs(regs)?;
+ // We put compile_side_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
asm.compile_side_exits();
+ if use_scratch_reg {
+ asm = asm.arm64_split_with_scratch_reg();
+ }
// Create label instances in the code block
for (idx, name) in asm.label_names.iter().enumerate() {
@@ -1809,12 +1856,39 @@ mod tests {
assert_snapshot!(cb.hexdump(), @"50000058030000140010000000000000b00200f8");
}
+ #[test]
+ fn test_store_with_valid_scratch_reg() {
+ let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg();
+ let mut cb = CodeBlock::new_dummy();
+ asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into());
+
+ asm.compile_with_num_regs(&mut cb, 0);
+ assert_disasm_snapshot!(cb.disasm(), @r"
+ 0x0: mov x16, #0x3902
+ 0x4: movk x16, #8, lsl #16
+ 0x8: stur x16, [x15]
+ ");
+ assert_snapshot!(cb.hexdump(), @"502087d21001a0f2f00100f8");
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_store_with_invalid_scratch_reg() {
+ let (_, scratch_reg) = Assembler::new_with_scratch_reg();
+ let (mut asm, mut cb) = setup_asm();
+ // This would put the source into scratch_reg, messing up the destination
+ asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into());
+
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
#[test]
#[should_panic]
- fn test_store_unserviceable() {
+ fn test_load_into_with_invalid_scratch_reg() {
+ let (_, scratch_reg) = Assembler::new_with_scratch_reg();
let (mut asm, mut cb) = setup_asm();
- // This would put the source into SCRATCH_REG, messing up the destination
- asm.store(Opnd::mem(64, SCRATCH_OPND, 0), 0x83902.into());
+ // This would put the source into scratch_reg, messing up the destination
+ asm.load_into(scratch_reg, 0x83902.into());
asm.compile_with_num_regs(&mut cb, 0);
}
@@ -2300,13 +2374,13 @@ mod tests {
asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
assert_disasm_snapshot!(cb.disasm(), @"
- 0x0: mov x16, x0
+ 0x0: mov x15, x0
0x4: mov x0, x1
- 0x8: mov x1, x16
+ 0x8: mov x1, x15
0xc: mov x16, #0
0x10: blr x16
");
- assert_snapshot!(cb.hexdump(), @"f00300aae00301aae10310aa100080d200023fd6");
+ assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae1030faa100080d200023fd6");
}
#[test]
@@ -2324,16 +2398,16 @@ mod tests {
asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
assert_disasm_snapshot!(cb.disasm(), @"
- 0x0: mov x16, x2
+ 0x0: mov x15, x2
0x4: mov x2, x3
- 0x8: mov x3, x16
- 0xc: mov x16, x0
+ 0x8: mov x3, x15
+ 0xc: mov x15, x0
0x10: mov x0, x1
- 0x14: mov x1, x16
+ 0x14: mov x1, x15
0x18: mov x16, #0
0x1c: blr x16
");
- assert_snapshot!(cb.hexdump(), @"f00302aae20303aae30310aaf00300aae00301aae10310aa100080d200023fd6");
+ assert_snapshot!(cb.hexdump(), @"ef0302aae20303aae3030faaef0300aae00301aae1030faa100080d200023fd6");
}
#[test]
@@ -2350,13 +2424,13 @@ mod tests {
asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
assert_disasm_snapshot!(cb.disasm(), @"
- 0x0: mov x16, x0
+ 0x0: mov x15, x0
0x4: mov x0, x1
0x8: mov x1, x2
- 0xc: mov x2, x16
+ 0xc: mov x2, x15
0x10: mov x16, #0
0x14: blr x16
");
- assert_snapshot!(cb.hexdump(), @"f00300aae00301aae10302aae20310aa100080d200023fd6");
+ assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae10302aae2030faa100080d200023fd6");
}
}
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index 76a53c66d6b652..aad5600f569767 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -17,7 +17,6 @@ pub use crate::backend::current::{
NATIVE_STACK_PTR, NATIVE_BASE_PTR,
C_ARG_OPNDS, C_RET_REG, C_RET_OPND,
};
-pub const SCRATCH_OPND: Opnd = Opnd::Reg(Assembler::SCRATCH_REG);
pub static JIT_PRESERVED_REGS: &[Opnd] = &[CFP, SP, EC];
@@ -1173,6 +1172,10 @@ pub struct Assembler {
/// Names of labels
pub(super) label_names: Vec,
+ /// If true, `push_insn` is allowed to use scratch registers.
+ /// On `compile`, it also disables the backend's use of them.
+ pub(super) accept_scratch_reg: bool,
+
/// If Some, the next ccall should verify its leafness
leaf_ccall_stack_size: Option
}
@@ -1181,12 +1184,12 @@ impl Assembler
{
/// Create an Assembler
pub fn new() -> Self {
- Self::new_with_label_names(Vec::default(), 0)
+ Self::new_with_label_names(Vec::default(), 0, false)
}
/// Create an Assembler with parameters that are populated by another Assembler instance.
/// This API is used for copying an Assembler for the next compiler pass.
- pub fn new_with_label_names(label_names: Vec, num_vregs: usize) -> Self {
+ pub fn new_with_label_names(label_names: Vec, num_vregs: usize, accept_scratch_reg: bool) -> Self {
let mut live_ranges = Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY);
live_ranges.resize(num_vregs, LiveRange { start: None, end: None });
@@ -1194,6 +1197,7 @@ impl Assembler
insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY),
live_ranges,
label_names,
+ accept_scratch_reg,
leaf_ccall_stack_size: None,
}
}
@@ -1255,6 +1259,14 @@ impl Assembler
}
}
+ // If this Assembler should not accept scratch registers, assert no use of them.
+ if !self.accept_scratch_reg {
+ let opnd_iter = insn.opnd_iter();
+ for opnd in opnd_iter {
+ assert!(!Self::has_scratch_reg(*opnd), "should not use scratch register: {opnd:?}");
+ }
+ }
+
self.insns.push(insn);
}
@@ -1268,9 +1280,9 @@ impl Assembler
Target::Label(label)
}
- // Shuffle register moves, sometimes adding extra moves using SCRATCH_REG,
+ // Shuffle register moves, sometimes adding extra moves using scratch_reg,
// so that they will not rewrite each other before they are used.
- pub fn resolve_parallel_moves(old_moves: &[(Reg, Opnd)]) -> Vec<(Reg, Opnd)> {
+ pub fn resolve_parallel_moves(old_moves: &[(Reg, Opnd)], scratch_reg: Option) -> Option> {
// Return the index of a move whose destination is not used as a source if any.
fn find_safe_move(moves: &[(Reg, Opnd)]) -> Option {
moves.iter().enumerate().find(|&(_, &(dest_reg, _))| {
@@ -1289,19 +1301,21 @@ impl Assembler
new_moves.push(old_moves.remove(index));
}
- // No safe move. Load the source of one move into SCRATCH_REG, and
- // then load SCRATCH_REG into the destination when it's safe.
+ // No safe move. Load the source of one move into scratch_reg, and
+ // then load scratch_reg into the destination when it's safe.
if !old_moves.is_empty() {
- // Make sure it's safe to use SCRATCH_REG
- assert!(old_moves.iter().all(|&(_, opnd)| opnd != SCRATCH_OPND));
+ // If scratch_reg is None, return None and leave it to *_split_with_scratch_regs to resolve it.
+ let scratch_reg = scratch_reg?.unwrap_reg();
+ // Make sure it's safe to use scratch_reg
+ assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(scratch_reg)));
- // Move SCRATCH <- opnd, and delay reg <- SCRATCH
+ // Move scratch_reg <- opnd, and delay reg <- scratch_reg
let (reg, opnd) = old_moves.remove(0);
- new_moves.push((Assembler::SCRATCH_REG, opnd));
- old_moves.push((reg, SCRATCH_OPND));
+ new_moves.push((scratch_reg, opnd));
+ old_moves.push((reg, Opnd::Reg(scratch_reg)));
}
}
- new_moves
+ Some(new_moves)
}
/// Sets the out field on the various instructions that require allocated
@@ -1345,7 +1359,7 @@ impl Assembler
// live_ranges is indexed by original `index` given by the iterator.
let live_ranges: Vec = take(&mut self.live_ranges);
let mut iterator = self.insns.into_iter().enumerate().peekable();
- let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len());
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len(), self.accept_scratch_reg);
while let Some((index, mut insn)) = iterator.next() {
let before_ccall = match (&insn, iterator.peek().map(|(_, insn)| insn)) {
@@ -1510,9 +1524,14 @@ impl Assembler
let is_ccall = matches!(insn, Insn::CCall { .. });
match insn {
Insn::ParallelMov { moves } => {
- // Now that register allocation is done, it's ready to resolve parallel moves.
- for (reg, opnd) in Self::resolve_parallel_moves(&moves) {
- asm.load_into(Opnd::Reg(reg), opnd);
+ // For trampolines that use scratch registers, attempt to lower ParallelMov without scratch_reg.
+ if let Some(moves) = Self::resolve_parallel_moves(&moves, None) {
+ for (reg, opnd) in moves {
+ asm.load_into(Opnd::Reg(reg), opnd);
+ }
+ } else {
+ // If it needs a scratch_reg, leave it to *_split_with_scratch_regs to handle it.
+ asm.push_insn(Insn::ParallelMov { moves });
}
}
Insn::CCall { opnds, fptr, start_marker, end_marker, out } => {
@@ -1586,7 +1605,7 @@ impl Assembler
for (idx, target) in targets {
// Compile a side exit. Note that this is past the split pass and alloc_regs(),
- // so you can't use a VReg or an instruction that needs to be split.
+ // so you can't use an instruction that returns a VReg.
if let Target::SideExit { pc, stack, locals, reason, label } = target {
asm_comment!(self, "Exit: {reason}");
let side_exit_label = if let Some(label) = label {
@@ -1609,35 +1628,24 @@ impl Assembler
}
asm_comment!(self, "save cfp->pc");
- self.load_into(SCRATCH_OPND, Opnd::const_ptr(pc));
- self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), SCRATCH_OPND);
+ self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(pc));
asm_comment!(self, "save cfp->sp");
- self.lea_into(SCRATCH_OPND, Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32));
- let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP);
- self.store(cfp_sp, SCRATCH_OPND);
+ self.lea_into(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32));
// Using C_RET_OPND as an additional scratch register, which is no longer used
if get_option!(stats) {
asm_comment!(self, "increment a side exit counter");
- self.load_into(SCRATCH_OPND, Opnd::const_ptr(exit_counter_ptr(reason)));
- self.incr_counter_with_reg(Opnd::mem(64, SCRATCH_OPND, 0), 1.into(), C_RET_OPND);
+ self.incr_counter(Opnd::const_ptr(exit_counter_ptr(reason)), 1.into());
if let SideExitReason::UnhandledYARVInsn(opcode) = reason {
asm_comment!(self, "increment an unhandled YARV insn counter");
- self.load_into(SCRATCH_OPND, Opnd::const_ptr(exit_counter_ptr_for_opcode(opcode)));
- self.incr_counter_with_reg(Opnd::mem(64, SCRATCH_OPND, 0), 1.into(), C_RET_OPND);
+ self.incr_counter(Opnd::const_ptr(exit_counter_ptr_for_opcode(opcode)), 1.into());
}
}
if get_option!(trace_side_exits) {
- // Use `load_into` with `C_ARG_OPNDS` instead of `opnds` argument for ccall, since `compile_side_exits`
- // is after the split pass, which would allow use of `opnds`.
- self.load_into(C_ARG_OPNDS[0], Opnd::const_ptr(pc as *const u8));
- self.ccall(
- rb_zjit_record_exit_stack as *const u8,
- vec![]
- );
+ asm_ccall!(self, rb_zjit_record_exit_stack, Opnd::const_ptr(pc as *const u8));
}
asm_comment!(self, "exit to the interpreter");
@@ -1823,19 +1831,6 @@ impl Assembler {
self.push_insn(Insn::IncrCounter { mem, value });
}
- /// incr_counter() but uses a specific register to split Insn::Lea
- pub fn incr_counter_with_reg(&mut self, mem: Opnd, value: Opnd, reg: Opnd) {
- assert!(matches!(reg, Opnd::Reg(_)), "incr_counter_with_reg should take a register, got: {reg:?}");
- let counter_opnd = if cfg!(target_arch = "aarch64") { // See arm64_split()
- assert_ne!(reg, SCRATCH_OPND, "SCRATCH_REG should be reserved for IncrCounter");
- self.lea_into(reg, mem);
- reg
- } else { // x86_emit() expects Opnd::Mem
- mem
- };
- self.incr_counter(counter_opnd, value);
- }
-
pub fn jbe(&mut self, target: Target) {
self.push_insn(Insn::Jbe(target));
}
@@ -1898,7 +1893,7 @@ impl Assembler {
}
pub fn lea_into(&mut self, out: Opnd, opnd: Opnd) {
- assert!(matches!(out, Opnd::Reg(_)), "Destination of lea_into must be a register, got: {out:?}");
+ assert!(matches!(out, Opnd::Reg(_) | Opnd::Mem(_)), "Destination of lea_into must be a register or memory, got: {out:?}");
self.push_insn(Insn::Lea { opnd, out });
}
diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs
index 2edd15380871e1..b6c4658463048a 100644
--- a/zjit/src/backend/x86_64/mod.rs
+++ b/zjit/src/backend/x86_64/mod.rs
@@ -83,7 +83,7 @@ impl From<&Opnd> for X86Opnd {
/// List of registers that can be used for register allocation.
/// This has the same number of registers for x86_64 and arm64.
-/// SCRATCH_REG is excluded.
+/// SCRATCH_OPND is excluded.
pub const ALLOC_REGS: &[Reg] = &[
RDI_REG,
RSI_REG,
@@ -95,14 +95,26 @@ pub const ALLOC_REGS: &[Reg] = &[
RAX_REG,
];
-impl Assembler
-{
- /// Special scratch registers for intermediate processing.
- /// This register is call-clobbered (so we don't have to save it before using it).
- /// Avoid using if you can since this is used to lower [Insn] internally and
- /// so conflicts are possible.
- pub const SCRATCH_REG: Reg = R11_REG;
- const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG);
+/// Special scratch register for intermediate processing. It should be used only by
+/// [`Assembler::x86_split_with_scratch_reg`] or [`Assembler::new_with_scratch_reg`].
+const SCRATCH_OPND: Opnd = Opnd::Reg(R11_REG);
+
+impl Assembler {
+ /// Return an Assembler with scratch registers disabled in the backend, and a scratch register.
+ pub fn new_with_scratch_reg() -> (Self, Opnd) {
+ (Self::new_with_label_names(Vec::default(), 0, true), SCRATCH_OPND)
+ }
+
+ /// Return true if opnd contains a scratch reg
+ pub fn has_scratch_reg(opnd: Opnd) -> bool {
+ match opnd {
+ Opnd::Reg(_) => opnd == SCRATCH_OPND,
+ Opnd::Mem(Mem { base: MemBase::Reg(reg_no), .. }) => {
+ reg_no == SCRATCH_OPND.unwrap_reg().reg_no
+ }
+ _ => false,
+ }
+ }
/// Get the list of registers from which we can allocate on this platform
pub fn get_alloc_regs() -> Vec {
@@ -127,7 +139,7 @@ impl Assembler
{
let live_ranges: Vec = take(&mut self.live_ranges);
let mut iterator = self.insns.into_iter().enumerate().peekable();
- let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len());
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len(), self.accept_scratch_reg);
while let Some((index, mut insn)) = iterator.next() {
let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. });
@@ -374,36 +386,165 @@ impl Assembler
asm
}
- /// Emit platform-specific machine code
- pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Option> {
+ /// Split instructions using scratch registers. To maximize the use of the register pool
+ /// for VRegs, most splits should happen in [`Self::x86_split`]. However, some instructions
+ /// need to be split with registers after `alloc_regs`, e.g. for `compile_side_exits`, so
+ /// this splits them and uses scratch registers for it.
+ pub fn x86_split_with_scratch_reg(mut self) -> Assembler {
/// For some instructions, we want to be able to lower a 64-bit operand
/// without requiring more registers to be available in the register
- /// allocator. So we just use the SCRATCH0 register temporarily to hold
+ /// allocator. So we just use the SCRATCH_OPND register temporarily to hold
/// the value before we immediately use it.
- fn emit_64bit_immediate(cb: &mut CodeBlock, opnd: &Opnd) -> X86Opnd {
+ fn split_64bit_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
match opnd {
Opnd::Imm(value) => {
// 32-bit values will be sign-extended
- if imm_num_bits(*value) > 32 {
- mov(cb, Assembler::SCRATCH0, opnd.into());
- Assembler::SCRATCH0
+ if imm_num_bits(value) > 32 {
+ asm.mov(SCRATCH_OPND, opnd);
+ SCRATCH_OPND
} else {
- opnd.into()
+ opnd
}
},
Opnd::UImm(value) => {
// 32-bit values will be sign-extended
- if imm_num_bits(*value as i64) > 32 {
- mov(cb, Assembler::SCRATCH0, opnd.into());
- Assembler::SCRATCH0
+ if imm_num_bits(value as i64) > 32 {
+ asm.mov(SCRATCH_OPND, opnd);
+ SCRATCH_OPND
} else {
- imm_opnd(*value as i64)
+ Opnd::Imm(value as i64)
}
},
- _ => opnd.into()
+ _ => opnd
}
}
+ let mut iterator = self.insns.into_iter().enumerate().peekable();
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), self.live_ranges.len(), true);
+
+ while let Some((_, mut insn)) = iterator.next() {
+ match &mut insn {
+ Insn::Add { right, .. } |
+ Insn::Sub { right, .. } |
+ Insn::Mul { right, .. } |
+ Insn::And { right, .. } |
+ Insn::Or { right, .. } |
+ Insn::Xor { right, .. } |
+ Insn::Test { right, .. } => {
+ *right = split_64bit_immediate(&mut asm, *right);
+ asm.push_insn(insn);
+ }
+ Insn::Cmp { left, right } => {
+ let num_bits = match right {
+ Opnd::Imm(value) => Some(imm_num_bits(*value)),
+ Opnd::UImm(value) => Some(uimm_num_bits(*value)),
+ _ => None
+ };
+
+ // If the immediate is less than 64 bits (like 32, 16, 8), and the operand
+ // sizes match, then we can represent it as an immediate in the instruction
+ // without moving it to a register first.
+ // IOW, 64 bit immediates must always be moved to a register
+ // before comparisons, where other sizes may be encoded
+ // directly in the instruction.
+ let use_imm = num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64;
+ if !use_imm {
+ *right = split_64bit_immediate(&mut asm, *right);
+ }
+ asm.push_insn(insn);
+ }
+ // For compile_side_exits, support splitting simple C arguments here
+ Insn::CCall { opnds, .. } if !opnds.is_empty() => {
+ for (i, opnd) in opnds.iter().enumerate() {
+ asm.load_into(C_ARG_OPNDS[i], *opnd);
+ }
+ *opnds = vec![];
+ asm.push_insn(insn);
+ }
+ &mut Insn::Lea { opnd, out } => {
+ match (opnd, out) {
+ // Split here for compile_side_exits
+ (Opnd::Mem(_), Opnd::Mem(_)) => {
+ asm.lea_into(SCRATCH_OPND, opnd);
+ asm.store(out, SCRATCH_OPND);
+ }
+ _ => {
+ asm.push_insn(insn);
+ }
+ }
+ }
+ Insn::LeaJumpTarget { target, out } => {
+ if let Target::Label(_) = target {
+ asm.push_insn(Insn::LeaJumpTarget { out: SCRATCH_OPND, target: target.clone() });
+ asm.mov(*out, SCRATCH_OPND);
+ }
+ }
+ // Convert Opnd::const_ptr into Opnd::Mem. This split is done here to give
+ // a register for compile_side_exits.
+ &mut Insn::IncrCounter { mem, value } => {
+ assert!(matches!(mem, Opnd::UImm(_)));
+ asm.load_into(SCRATCH_OPND, mem);
+ asm.incr_counter(Opnd::mem(64, SCRATCH_OPND, 0), value);
+ }
+ // Resolve ParallelMov that couldn't be handled without a scratch register.
+ Insn::ParallelMov { moves } => {
+ for (reg, opnd) in Self::resolve_parallel_moves(&moves, Some(SCRATCH_OPND)).unwrap() {
+ asm.load_into(Opnd::Reg(reg), opnd);
+ }
+ }
+ // Handle various operand combinations for spills on compile_side_exits.
+ &mut Insn::Store { dest, src } => {
+ let Opnd::Mem(Mem { num_bits, .. }) = dest else {
+ panic!("Unexpected Insn::Store destination in x86_split_with_scratch_reg: {dest:?}");
+ };
+
+ let src = match src {
+ Opnd::Reg(_) => src,
+ Opnd::Mem(_) => {
+ asm.mov(SCRATCH_OPND, src);
+ SCRATCH_OPND
+ }
+ Opnd::Imm(imm) => {
+ // For 64 bit destinations, 32-bit values will be sign-extended
+ if num_bits == 64 && imm_num_bits(imm) > 32 {
+ asm.mov(SCRATCH_OPND, src);
+ SCRATCH_OPND
+ } else if uimm_num_bits(imm as u64) <= num_bits {
+ // If the bit string is short enough for the destination, use the unsigned representation.
+ // Note that 64-bit and negative values are ruled out.
+ Opnd::UImm(imm as u64)
+ } else {
+ src
+ }
+ }
+ Opnd::UImm(imm) => {
+ // For 64 bit destinations, 32-bit values will be sign-extended
+ if num_bits == 64 && imm_num_bits(imm as i64) > 32 {
+ asm.mov(SCRATCH_OPND, src);
+ SCRATCH_OPND
+ } else {
+ src.into()
+ }
+ }
+ Opnd::Value(_) => {
+ asm.load_into(SCRATCH_OPND, src);
+ SCRATCH_OPND
+ }
+ src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_split_with_scratch_reg: {src:?}"),
+ };
+ asm.store(dest, src);
+ }
+ _ => {
+ asm.push_insn(insn);
+ }
+ }
+ }
+
+ asm
+ }
+
+ /// Emit platform-specific machine code
+ pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Option> {
fn emit_csel(
cb: &mut CodeBlock,
truthy: Opnd,
@@ -506,33 +647,27 @@ impl Assembler
}
Insn::Add { left, right, .. } => {
- let opnd1 = emit_64bit_immediate(cb, right);
- add(cb, left.into(), opnd1);
+ add(cb, left.into(), right.into());
},
Insn::Sub { left, right, .. } => {
- let opnd1 = emit_64bit_immediate(cb, right);
- sub(cb, left.into(), opnd1);
+ sub(cb, left.into(), right.into());
},
Insn::Mul { left, right, .. } => {
- let opnd1 = emit_64bit_immediate(cb, right);
- imul(cb, left.into(), opnd1);
+ imul(cb, left.into(), right.into());
},
Insn::And { left, right, .. } => {
- let opnd1 = emit_64bit_immediate(cb, right);
- and(cb, left.into(), opnd1);
+ and(cb, left.into(), right.into());
},
Insn::Or { left, right, .. } => {
- let opnd1 = emit_64bit_immediate(cb, right);
- or(cb, left.into(), opnd1);
+ or(cb, left.into(), right.into());
},
Insn::Xor { left, right, .. } => {
- let opnd1 = emit_64bit_immediate(cb, right);
- xor(cb, left.into(), opnd1);
+ xor(cb, left.into(), right.into());
},
Insn::Not { opnd, .. } => {
@@ -551,64 +686,6 @@ impl Assembler
shr(cb, opnd.into(), shift.into())
},
- store_insn @ Insn::Store { dest, src } => {
- let &Opnd::Mem(Mem { num_bits, base: MemBase::Reg(base_reg_no), disp: _ }) = dest else {
- panic!("Unexpected Insn::Store destination in x64_emit: {dest:?}");
- };
-
- // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
- // so we panic to get the author to change their code.
- #[track_caller]
- fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
- assert_ne!(
- backend_use.reg_no,
- user_use,
- "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
- );
- }
-
- let scratch = X86Opnd::Reg(Self::SCRATCH_REG);
- let src = match src {
- Opnd::Reg(_) => src.into(),
- &Opnd::Mem(_) => {
- assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
- mov(cb, scratch, src.into());
- scratch
- }
- &Opnd::Imm(imm) => {
- // For 64 bit destinations, 32-bit values will be sign-extended
- if num_bits == 64 && imm_num_bits(imm) > 32 {
- assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
- mov(cb, scratch, src.into());
- scratch
- } else if uimm_num_bits(imm as u64) <= num_bits {
- // If the bit string is short enough for the destination, use the unsigned representation.
- // Note that 64-bit and negative values are ruled out.
- uimm_opnd(imm as u64)
- } else {
- src.into()
- }
- }
- &Opnd::UImm(imm) => {
- // For 64 bit destinations, 32-bit values will be sign-extended
- if num_bits == 64 && imm_num_bits(imm as i64) > 32 {
- assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
- mov(cb, scratch, src.into());
- scratch
- } else {
- src.into()
- }
- }
- &Opnd::Value(value) => {
- assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
- emit_load_gc_value(cb, &mut gc_offsets, scratch, value);
- scratch
- }
- src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_emit: {src:?}")
- };
- mov(cb, dest.into(), src);
- }
-
// This assumes only load instructions can contain references to GC'd Value operands
Insn::Load { opnd, out } |
Insn::LoadInto { dest: out, opnd } => {
@@ -626,6 +703,7 @@ impl Assembler
Insn::ParallelMov { .. } => unreachable!("{insn:?} should have been lowered at alloc_regs()"),
+ Insn::Store { dest, src } |
Insn::Mov { dest, src } => {
mov(cb, dest.into(), src.into());
},
@@ -638,13 +716,12 @@ impl Assembler
// Load address of jump target
Insn::LeaJumpTarget { target, out } => {
if let Target::Label(label) = target {
+ let out = *out;
// Set output to the raw address of the label
- cb.label_ref(*label, 7, |cb, src_addr, dst_addr| {
+ cb.label_ref(*label, 7, move |cb, src_addr, dst_addr| {
let disp = dst_addr - src_addr;
- lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap()));
+ lea(cb, out.into(), mem_opnd(8, RIP, disp.try_into().unwrap()));
});
-
- mov(cb, out.into(), Self::SCRATCH0);
} else {
// Set output to the jump target's raw address
let target_code = target.unwrap_code_ptr();
@@ -700,30 +777,12 @@ impl Assembler
// Compare
Insn::Cmp { left, right } => {
- let num_bits = match right {
- Opnd::Imm(value) => Some(imm_num_bits(*value)),
- Opnd::UImm(value) => Some(uimm_num_bits(*value)),
- _ => None
- };
-
- // If the immediate is less than 64 bits (like 32, 16, 8), and the operand
- // sizes match, then we can represent it as an immediate in the instruction
- // without moving it to a register first.
- // IOW, 64 bit immediates must always be moved to a register
- // before comparisons, where other sizes may be encoded
- // directly in the instruction.
- if num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64 {
- cmp(cb, left.into(), right.into());
- } else {
- let emitted = emit_64bit_immediate(cb, right);
- cmp(cb, left.into(), emitted);
- }
+ cmp(cb, left.into(), right.into());
}
// Test and set flags
Insn::Test { left, right } => {
- let emitted = emit_64bit_immediate(cb, right);
- test(cb, left.into(), emitted);
+ test(cb, left.into(), right.into());
}
Insn::JmpOpnd(opnd) => {
@@ -893,9 +952,16 @@ impl Assembler
/// Optimize and compile the stored instructions
pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Result<(CodePtr, Vec), CompileError> {
+ // The backend is allowed to use scratch registers only if it has not accepted them so far.
+ let use_scratch_regs = !self.accept_scratch_reg;
+
let asm = self.x86_split();
let mut asm = asm.alloc_regs(regs)?;
+ // We put compile_side_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
asm.compile_side_exits();
+ if use_scratch_regs {
+ asm = asm.x86_split_with_scratch_reg();
+ }
// Create label instances in the code block
for (idx, name) in asm.label_names.iter().enumerate() {
@@ -1527,6 +1593,7 @@ mod tests {
assert!(imitation_heap_value.heap_object_p());
asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());
+ asm = asm.x86_split_with_scratch_reg();
let gc_offsets = asm.x86_emit(&mut cb).unwrap();
assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 262a0361b4a74e..c5bdbcfe0a5a83 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -18,7 +18,7 @@ use crate::state::ZJITState;
use crate::stats::{send_fallback_counter, exit_counter_for_compile_error, incr_counter, incr_counter_by, send_fallback_counter_for_method_type, send_without_block_fallback_counter_for_method_type, send_fallback_counter_ptr_for_opcode, CompileError};
use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}};
use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr};
-use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SCRATCH_OPND, SP};
+use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP};
use crate::hir::{iseq_to_hir, BlockId, BranchEdge, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType};
use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason};
use crate::hir_type::{types, Type};
@@ -1592,9 +1592,7 @@ fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd,
/// Generate code that records unoptimized C functions if --zjit-stats is enabled
fn gen_incr_counter_ptr(asm: &mut Assembler, counter_ptr: *mut u64) {
if get_option!(stats) {
- let ptr_reg = asm.load(Opnd::const_ptr(counter_ptr as *const u8));
- let counter_opnd = Opnd::mem(64, ptr_reg, 0);
- asm.incr_counter(counter_opnd, Opnd::UImm(1));
+ asm.incr_counter(Opnd::const_ptr(counter_ptr as *const u8), Opnd::UImm(1));
}
}
@@ -1963,12 +1961,12 @@ fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result
/// Compile a stub for an ISEQ called by SendWithoutBlockDirect
fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result {
- let mut asm = Assembler::new();
+ let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg();
asm_comment!(asm, "Stub: {}", iseq_get_location(iseq_call.iseq.get(), 0));
// Call function_stub_hit using the shared trampoline. See `gen_function_stub_hit_trampoline`.
// Use load_into instead of mov, which is split on arm64, to avoid clobbering ALLOC_REGS.
- asm.load_into(SCRATCH_OPND, Opnd::const_ptr(Rc::into_raw(iseq_call)));
+ asm.load_into(scratch_reg, Opnd::const_ptr(Rc::into_raw(iseq_call)));
asm.jmp(ZJITState::get_function_stub_hit_trampoline().into());
asm.compile(cb).map(|(code_ptr, gc_offsets)| {
@@ -1979,7 +1977,7 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result Result {
- let mut asm = Assembler::new();
+ let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg();
asm_comment!(asm, "function_stub_hit trampoline");
// Maintain alignment for x86_64, and set up a frame for arm64 properly
@@ -1992,8 +1990,8 @@ pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Result Result>();
+ let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns);
fun.blocks[block.0].insns.extend(insns);
fun.make_equal_to(send_insn_id, replacement);
fun.remove_block(tmp_block);
@@ -2453,7 +2453,7 @@ impl Function {
if let Some(replacement) = (props.inline)(fun, tmp_block, recv, &args, state) {
// Copy contents of tmp_block to block
assert_ne!(block, tmp_block);
- let insns = fun.blocks[tmp_block.0].insns.drain(..).collect::>();
+ let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns);
fun.blocks[block.0].insns.extend(insns);
fun.make_equal_to(send_insn_id, replacement);
fun.remove_block(tmp_block);
@@ -4957,7 +4957,7 @@ mod tests {
}
#[track_caller]
- fn assert_contains_opcode(method: &str, opcode: u32) {
+ pub fn assert_contains_opcode(method: &str, opcode: u32) {
let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("self", method));
unsafe { crate::cruby::rb_zjit_profile_disable(iseq) };
assert!(iseq_contains_opcode(iseq, opcode), "iseq {method} does not contain {}", insn_name(opcode as usize));
@@ -7999,6 +7999,7 @@ mod opt_tests {
use super::*;
use crate::{hir_strings, options::*};
use insta::assert_snapshot;
+ use super::tests::assert_contains_opcode;
#[track_caller]
fn hir_string(method: &str) -> String {
@@ -12817,4 +12818,145 @@ mod opt_tests {
Return v25
");
}
+
+ #[test]
+ fn test_optimize_array_aset() {
+ eval("
+ def test(arr)
+ arr[1] = 10
+ end
+ test([])
+ ");
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@:3:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal l0, SP@4
+ Jump bb2(v1, v2)
+ bb1(v5:BasicObject, v6:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v5, v6)
+ bb2(v8:BasicObject, v9:BasicObject):
+ v14:Fixnum[1] = Const Value(1)
+ v15:Fixnum[10] = Const Value(10)
+ PatchPoint MethodRedefined(Array@0x1000, []=@0x1008, cme:0x1010)
+ PatchPoint NoSingletonClass(Array@0x1000)
+ v28:ArrayExact = GuardType v9, ArrayExact
+ v29:BasicObject = CCallVariadic []=@0x1038, v28, v14, v15
+ CheckInterrupts
+ Return v15
+ ");
+ }
+
+ #[test]
+ fn test_optimize_array_ltlt() {
+ eval("
+ def test(arr)
+ arr << 1
+ end
+ test([])
+ ");
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@:3:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal l0, SP@4
+ Jump bb2(v1, v2)
+ bb1(v5:BasicObject, v6:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v5, v6)
+ bb2(v8:BasicObject, v9:BasicObject):
+ v13:Fixnum[1] = Const Value(1)
+ PatchPoint MethodRedefined(Array@0x1000, <<@0x1008, cme:0x1010)
+ PatchPoint NoSingletonClass(Array@0x1000)
+ v26:ArrayExact = GuardType v9, ArrayExact
+ v27:BasicObject = CCallWithFrame <<@0x1038, v26, v13
+ CheckInterrupts
+ Return v27
+ ");
+ }
+
+ #[test]
+ fn test_optimize_array_length() {
+ eval("
+ def test(arr) = arr.length
+ test([])
+ ");
+ assert_contains_opcode("test", YARVINSN_opt_length);
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@:2:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal l0, SP@4
+ Jump bb2(v1, v2)
+ bb1(v5:BasicObject, v6:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v5, v6)
+ bb2(v8:BasicObject, v9:BasicObject):
+ PatchPoint MethodRedefined(Array@0x1000, length@0x1008, cme:0x1010)
+ PatchPoint NoSingletonClass(Array@0x1000)
+ v25:ArrayExact = GuardType v9, ArrayExact
+ v26:Fixnum = CCall length@0x1038, v25
+ CheckInterrupts
+ Return v26
+ ");
+ }
+
+ #[test]
+ fn test_optimize_array_size() {
+ eval("
+ def test(arr) = arr.size
+ test([])
+ ");
+ assert_contains_opcode("test", YARVINSN_opt_size);
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@:2:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal l0, SP@4
+ Jump bb2(v1, v2)
+ bb1(v5:BasicObject, v6:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v5, v6)
+ bb2(v8:BasicObject, v9:BasicObject):
+ PatchPoint MethodRedefined(Array@0x1000, size@0x1008, cme:0x1010)
+ PatchPoint NoSingletonClass(Array@0x1000)
+ v25:ArrayExact = GuardType v9, ArrayExact
+ v26:Fixnum = CCall size@0x1038, v25
+ CheckInterrupts
+ Return v26
+ ");
+ }
+
+ #[test]
+ fn test_optimize_regexpmatch2() {
+ eval(r#"
+ def test(s) = s =~ /a/
+ test("foo")
+ "#);
+ assert_contains_opcode("test", YARVINSN_opt_regexpmatch2);
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@:2:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal l0, SP@4
+ Jump bb2(v1, v2)
+ bb1(v5:BasicObject, v6:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v5, v6)
+ bb2(v8:BasicObject, v9:BasicObject):
+ v13:RegexpExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
+ PatchPoint MethodRedefined(String@0x1008, =~@0x1010, cme:0x1018)
+ PatchPoint NoSingletonClass(String@0x1008)
+ v26:StringExact = GuardType v9, StringExact
+ v27:BasicObject = CCallWithFrame =~@0x1040, v26, v13
+ CheckInterrupts
+ Return v27
+ ");
+ }
}
diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs
index 67f2fdc7403822..e7db47142bcf65 100644
--- a/zjit/src/profile.rs
+++ b/zjit/src/profile.rs
@@ -74,9 +74,14 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) {
YARVINSN_opt_or => profile_operands(profiler, profile, 2),
YARVINSN_opt_empty_p => profile_operands(profiler, profile, 1),
YARVINSN_opt_aref => profile_operands(profiler, profile, 2),
+ YARVINSN_opt_ltlt => profile_operands(profiler, profile, 2),
+ YARVINSN_opt_aset => profile_operands(profiler, profile, 3),
YARVINSN_opt_not => profile_operands(profiler, profile, 1),
YARVINSN_getinstancevariable => profile_self(profiler, profile),
+ YARVINSN_opt_regexpmatch2 => profile_operands(profiler, profile, 2),
YARVINSN_objtostring => profile_operands(profiler, profile, 1),
+ YARVINSN_opt_length => profile_operands(profiler, profile, 1),
+ YARVINSN_opt_size => profile_operands(profiler, profile, 1),
YARVINSN_opt_send_without_block => {
let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr();
let argc = unsafe { vm_ci_argc((*cd).ci) };