diff --git a/test/.excludes-zjit/TestKeywordArguments.rb b/test/.excludes-zjit/TestKeywordArguments.rb deleted file mode 100644 index f52bdf6d30d6c6..00000000000000 --- a/test/.excludes-zjit/TestKeywordArguments.rb +++ /dev/null @@ -1 +0,0 @@ -exclude(/test_/, 'Multiple tests make ZJIT panic') diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 684a4bb2d40723..6db57e18ba729c 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1095,6 +1095,14 @@ def test_require_rubygems_with_auto_compact }, call_threshold: 2 end + def test_stats + assert_runs 'true', %q{ + def test = 1 + test + RubyVM::ZJIT.stats[:zjit_insns_count] > 0 + }, stats: true + end + def test_zjit_option_uses_array_each_in_ruby omit 'ZJIT wrongly compiles Array#each, so it is disabled for now' assert_runs '""', %q{ @@ -1414,12 +1422,13 @@ def assert_runs(expected, test_script, insns: [], assert_compiles: false, **opts end # Run a Ruby process with ZJIT options and a pipe for writing test results - def eval_with_jit(script, call_threshold: 1, num_profiles: 1, timeout: 1000, pipe_fd:, debug: true) + def eval_with_jit(script, call_threshold: 1, num_profiles: 1, stats: false, debug: true, timeout: 1000, pipe_fd:) args = [ "--disable-gems", "--zjit-call-threshold=#{call_threshold}", "--zjit-num-profiles=#{num_profiles}", ] + args << "--zjit-stats" if stats args << "--zjit-debug" if debug args << "-e" << script_shell_encode(script) pipe_r, pipe_w = IO.pipe diff --git a/vm_method.c b/vm_method.c index 874e25ed7666e6..76b1c97d046c2b 100644 --- a/vm_method.c +++ b/vm_method.c @@ -136,17 +136,19 @@ rb_vm_cc_table_create(size_t capa) static enum rb_id_table_iterator_result vm_cc_table_dup_i(ID key, VALUE old_ccs_ptr, void *data) { + VALUE new_table = (VALUE)data; struct rb_class_cc_entries *old_ccs = (struct rb_class_cc_entries *)old_ccs_ptr; size_t memsize = vm_ccs_alloc_size(old_ccs->capa); - struct rb_class_cc_entries *new_ccs = ruby_xmalloc(memsize); + struct rb_class_cc_entries *new_ccs = ruby_xcalloc(1, memsize); + rb_managed_id_table_insert(new_table, key, (VALUE)new_ccs); + memcpy(new_ccs, old_ccs, memsize); #if VM_CHECK_MODE > 0 new_ccs->debug_sig = ~(VALUE)new_ccs; #endif - VALUE new_table = (VALUE)data; - rb_managed_id_table_insert(new_table, key, (VALUE)new_ccs); + RB_OBJ_WRITTEN(new_table, Qundef, (VALUE)new_ccs->cme); for (int index = 0; index < new_ccs->len; index++) { RB_OBJ_WRITTEN(new_table, Qundef, new_ccs->entries[index].cc); } diff --git a/zjit/src/asm/arm64/inst/mov.rs b/zjit/src/asm/arm64/inst/mov.rs index eae4565c3ab0ce..58877ae94040c7 100644 --- a/zjit/src/asm/arm64/inst/mov.rs +++ b/zjit/src/asm/arm64/inst/mov.rs @@ -2,6 +2,9 @@ use super::super::arg::Sf; /// Which operation is being performed. enum Op { + /// A movn operation which inverts the immediate and zeroes out the other bits. + MOVN = 0b00, + /// A movz operation which zeroes out the other bits. MOVZ = 0b10, @@ -61,6 +64,12 @@ impl Mov { Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() } } + /// MOVN + /// + pub fn movn(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { + Self { rd, imm16, hw: hw.into(), op: Op::MOVN, sf: num_bits.into() } + } + /// MOVZ /// pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { @@ -104,6 +113,34 @@ mod tests { assert_eq!(0xf2800f60, result); } + #[test] + fn test_movn_unshifted() { + let inst = Mov::movn(0, 123, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0x92800f60, result); + } + + #[test] + fn test_movn_shifted_16() { + let inst = Mov::movn(0, 123, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0x92a00f60, result); + } + + #[test] + fn test_movn_shifted_32() { + let inst = Mov::movn(0, 123, 32, 64); + let result: u32 = inst.into(); + assert_eq!(0x92c00f60, result); + } + + #[test] + fn test_movn_shifted_48() { + let inst = Mov::movn(0, 123, 48, 64); + let result: u32 = inst.into(); + assert_eq!(0x92e00f60, result); + } + #[test] fn test_movk_shifted_16() { let inst = Mov::movk(0, 123, 16, 64); diff --git a/zjit/src/asm/arm64/mod.rs b/zjit/src/asm/arm64/mod.rs index ae14d561f65b16..0576b230907ff5 100644 --- a/zjit/src/asm/arm64/mod.rs +++ b/zjit/src/asm/arm64/mod.rs @@ -716,6 +716,21 @@ pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { cb.write_bytes(&bytes); } +/// MOVN - load a register with the complement of a shifted then zero extended 16-bit immediate +/// +pub fn movn(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + + Mov::movn(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movn instruction.") + }; + + cb.write_bytes(&bytes); +} + /// MOVZ - move a 16 bit immediate into a register, zero the other bits pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { let bytes: [u8; 4] = match (rd, imm16) { @@ -1543,6 +1558,11 @@ mod tests { check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); } + #[test] + fn test_movn() { + check_bytes("600fa092", |cb| movn(cb, X0, A64Opnd::new_uimm(123), 16)); + } + #[test] fn test_movz() { check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16)); diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index a1243f45e17e58..148d01ea862e97 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -140,6 +140,10 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { // instruction, then we'll use that. movz(cb, rd, A64Opnd::new_uimm(current), 0); return 1; + } else if u16::try_from(!value).is_ok() { + // For small negative values, use a single movn + movn(cb, rd, A64Opnd::new_uimm(!value), 0); + return 1; } else if BitmaskImmediate::try_from(current).is_ok() { // Otherwise, if the immediate can be encoded // with the special bitmask immediate encoding, @@ -191,11 +195,15 @@ pub const ALLOC_REGS: &'static [Reg] = &[ impl Assembler { - // Special scratch registers for intermediate processing. - // This register is caller-saved (so we don't have to save it before using it) + /// Special scratch registers for intermediate processing. + /// This register is call-clobbered (so we don't have to save it before using it). + /// Avoid using if you can since this is used to lower [Insn] internally and + /// so conflicts are possible. pub const SCRATCH_REG: Reg = X16_REG; - const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG); - const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG); + const SCRATCH0_REG: Reg = Self::SCRATCH_REG; + const SCRATCH1_REG: Reg = X17_REG; + const SCRATCH0: A64Opnd = A64Opnd::Reg(Self::SCRATCH0_REG); + const SCRATCH1: A64Opnd = A64Opnd::Reg(Self::SCRATCH1_REG); /// Get the list of registers from which we will allocate on this platform pub fn get_alloc_regs() -> Vec { @@ -648,31 +656,6 @@ impl Assembler *opnd = split_load_operand(asm, *opnd); asm.push_insn(insn); }, - Insn::Store { dest, src } => { - // The value being stored must be in a register, so if it's - // not already one we'll load it first. - let opnd1 = match src { - // If the first operand is zero, then we can just use - // the zero register. - Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), - // Otherwise we'll check if we need to load it first. - _ => split_load_operand(asm, *src) - }; - - match dest { - Opnd::Reg(_) => { - // Store does not support a register as a dest operand. - asm.mov(*dest, opnd1); - } - _ => { - // The displacement for the STUR instruction can't be more - // than 9 bits long. If it's longer, we need to load the - // memory address into a register first. - let opnd0 = split_memory_address(asm, *dest); - asm.store(opnd0, opnd1); - } - } - }, Insn::Mul { left, right, .. } => { *left = split_load_operand(asm, *left); *right = split_load_operand(asm, *right); @@ -839,6 +822,42 @@ impl Assembler } } + /// Do the address calculation of `out_reg = base_reg + disp` + fn load_effective_address(cb: &mut CodeBlock, out: A64Opnd, base_reg_no: u8, disp: i32) { + let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no }); + assert_ne!(31, out.unwrap_reg().reg_no, "Lea sp, [sp, #imm] not always encodable. Use add/sub instead."); + + if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() { + // Use ADD/SUB if the displacement fits + add(cb, out, base_reg, A64Opnd::new_imm(disp.into())); + } else { + // Use add_extended() to interpret reg_no=31 as sp + // since the base register is never the zero register. + // Careful! Only the first two operands can refer to sp. + emit_load_value(cb, out, disp as u64); + add_extended(cb, out, base_reg, out); + }; + } + + /// Load a VALUE to a register and remember it for GC marking and reference updating + fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec, dest: A64Opnd, value: VALUE) { + // We dont need to check if it's a special const + // here because we only allow these operands to hit + // this point if they're not a special const. + assert!(!value.special_const_p()); + + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value + // being loaded is a heap object, we'll report that + // back out to the gc_offsets list. + ldr_literal(cb, dest, 2.into()); + b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); + cb.write_bytes(&value.as_u64().to_le_bytes()); + + let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); + gc_offsets.push(ptr_offset); + } + /// Emit a push instruction for the given operand by adding to the stack /// pointer and then storing the given value. fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { @@ -1009,12 +1028,84 @@ impl Assembler Insn::LShift { opnd, shift, out } => { lsl(cb, out.into(), opnd.into(), shift.into()); }, - Insn::Store { dest, src } => { + store_insn @ Insn::Store { dest, src } => { + // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are + // accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory + // base register to gurantee things will work. + let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else { + panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}"); + }; + + // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG, + // so we panic to get the author to change their code. + #[track_caller] + fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) { + assert_ne!( + backend_use.reg_no, + user_use, + "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics" + ); + } + + // Split src into SCRATCH0 if necessary + let src_reg: A64Reg = match src { + Opnd::Reg(reg) => *reg, + // Use zero register when possible + Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG, + // Immediates + &Opnd::Imm(imm) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + emit_load_value(cb, Self::SCRATCH0, imm as u64); + Self::SCRATCH0_REG + } + &Opnd::UImm(imm) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + emit_load_value(cb, Self::SCRATCH0, imm); + Self::SCRATCH0_REG + } + &Opnd::Value(value) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value); + Self::SCRATCH0_REG + } + src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => { + // For mem-to-mem store, load the source into SCRATCH0 + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + let src_mem = if mem_disp_fits_bits(src_disp) { + src_mem.into() + } else { + // Split the load address into SCRATCH0 first if necessary + assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG); + load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp); + A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0) + }; + match src_num_bits { + 64 | 32 => ldur(cb, Self::SCRATCH0, src_mem), + 16 => ldurh(cb, Self::SCRATCH0, src_mem), + 8 => ldurb(cb, Self::SCRATCH0, src_mem), + num_bits => panic!("unexpected num_bits: {num_bits}") + }; + Self::SCRATCH0_REG + } + src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}") + }; + let src = A64Opnd::Reg(src_reg); + + // Split dest into SCRATCH1 if necessary. + let dest = if mem_disp_fits_bits(disp) { + dest.into() + } else { + assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG); + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG); + load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp); + A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0) + }; + // This order may be surprising but it is correct. The way // the Arm64 assembler works, the register that is going to // be stored is first and the address is second. However in // our IR we have the address first and the register second. - match dest.rm_num_bits() { + match dest_num_bits { 64 | 32 => stur(cb, src.into(), dest.into()), 16 => sturh(cb, src.into(), dest.into()), num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest), @@ -1041,21 +1132,7 @@ impl Assembler }; }, Opnd::Value(value) => { - // We dont need to check if it's a special const - // here because we only allow these operands to hit - // this point if they're not a special const. - assert!(!value.special_const_p()); - - // This assumes only load instructions can contain - // references to GC'd Value operands. If the value - // being loaded is a heap object, we'll report that - // back out to the gc_offsets list. - ldr_literal(cb, out.into(), 2.into()); - b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); - cb.write_bytes(&value.as_u64().to_le_bytes()); - - let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); - gc_offsets.push(ptr_offset); + emit_load_gc_value(cb, &mut gc_offsets, out.into(), value); }, Opnd::None => { unreachable!("Attempted to load from None operand"); @@ -1093,20 +1170,7 @@ impl Assembler let &Opnd::Mem(Mem { num_bits: _, base: MemBase::Reg(base_reg_no), disp }) = opnd else { panic!("Unexpected Insn::Lea operand in arm64_emit: {opnd:?}"); }; - let out: A64Opnd = out.into(); - let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no }); - assert_ne!(31, out.unwrap_reg().reg_no, "Insn::Lea sp, [sp, #imm] not always encodable. Use add/sub instead."); - - if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() { - // Use ADD/SUB if the displacement fits - add(cb, out, base_reg, A64Opnd::new_imm(disp.into())); - } else { - // Use add_extended() to interpret reg_no=31 as sp - // since the base register is never the zero register. - // Careful! Only the first two operands can refer to sp. - emit_load_value(cb, out, disp as u64); - add_extended(cb, out, base_reg, out); - }; + load_effective_address(cb, out.into(), base_reg_no, disp); } Insn::LeaJumpTarget { out, target, .. } => { if let Target::Label(label_idx) = target { @@ -1241,8 +1305,7 @@ impl Assembler } last_patch_pos = Some(cb.get_write_pos()); }, - Insn::IncrCounter { mem: _, value: _ } => { - /* + Insn::IncrCounter { mem, value } => { let label = cb.new_label("incr_counter_loop".to_string()); cb.write_label(label); @@ -1258,8 +1321,6 @@ impl Assembler cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0)); emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label)); - */ - unimplemented!("labels are not supported yet"); }, Insn::Breakpoint => { brk(cb, A64Opnd::None); @@ -1592,15 +1653,16 @@ mod tests { // Test values that exercise various types of immediates. // - 9 bit displacement for Load/Store - // - 12 bit shifted immediate + // - 12 bit ADD/SUB shifted immediate + // - 16 bit MOV family shifted immediates // - bit mask immediates - for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x1008, i32::MIN] { + for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x10008, i32::MIN] { let mem = Opnd::mem(64, NATIVE_STACK_PTR, displacement); asm.lea_into(Opnd::Reg(X0_REG), mem); } asm.compile_with_num_regs(&mut cb, 0); - assert_disasm!(cb, "e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d100009dd2e0ffbff2e0ffdff2e0fffff2e063208b00ff9dd2e0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b", " + assert_disasm!(cb, "e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d1e0ff8292e063208b00ff9fd2c0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b", " 0x0: orr x0, xzr, #0x7fffffff 0x4: add x0, sp, x0 0x8: mov x0, #8 @@ -1610,21 +1672,76 @@ mod tests { 0x18: add x0, sp, x0 0x1c: add x0, sp, #0x208 0x20: sub x0, sp, #0x208 - 0x24: mov x0, #0xe800 - 0x28: movk x0, #0xffff, lsl #16 - 0x2c: movk x0, #0xffff, lsl #32 - 0x30: movk x0, #0xffff, lsl #48 - 0x34: add x0, sp, x0 - 0x38: mov x0, #0xeff8 - 0x3c: movk x0, #0xffff, lsl #16 - 0x40: movk x0, #0xffff, lsl #32 - 0x44: movk x0, #0xffff, lsl #48 - 0x48: add x0, sp, x0 - 0x4c: orr x0, xzr, #0xffffffff80000000 - 0x50: add x0, sp, x0 + 0x24: mov x0, #-0x1800 + 0x28: add x0, sp, x0 + 0x2c: mov x0, #0xfff8 + 0x30: movk x0, #0xfffe, lsl #16 + 0x34: movk x0, #0xffff, lsl #32 + 0x38: movk x0, #0xffff, lsl #48 + 0x3c: add x0, sp, x0 + 0x40: orr x0, xzr, #0xffffffff80000000 + 0x44: add x0, sp, x0 + "); + } + + #[test] + fn test_store() { + let (mut asm, mut cb) = setup_asm(); + + // Large memory offsets in combinations of destination and source + let large_mem = Opnd::mem(64, NATIVE_STACK_PTR, -0x305); + let small_mem = Opnd::mem(64, C_RET_OPND, 0); + asm.store(small_mem, large_mem); + asm.store(large_mem, small_mem); + asm.store(large_mem, large_mem); + + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "f0170cd1100240f8100000f8100040f8f1170cd1300200f8f0170cd1100240f8f1170cd1300200f8", " + 0x0: sub x16, sp, #0x305 + 0x4: ldur x16, [x16] + 0x8: stur x16, [x0] + 0xc: ldur x16, [x0] + 0x10: sub x17, sp, #0x305 + 0x14: stur x16, [x17] + 0x18: sub x16, sp, #0x305 + 0x1c: ldur x16, [x16] + 0x20: sub x17, sp, #0x305 + 0x24: stur x16, [x17] + "); + } + + #[test] + fn test_store_value_without_split() { + let (mut asm, mut cb) = setup_asm(); + + let imitation_heap_value = VALUE(0x1000); + assert!(imitation_heap_value.heap_object_p()); + asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into()); + + // Side exit code are compiled without the split pass, so we directly call emit here to + // emulate that scenario. + let gc_offsets = asm.arm64_emit(&mut cb).unwrap(); + assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset"); + + assert_disasm!(cb, "50000058030000140010000000000000b00200f8", " + 0x0: ldr x16, #8 + 0x4: b #0x10 + 0x8: .byte 0x00, 0x10, 0x00, 0x00 + 0xc: .byte 0x00, 0x00, 0x00, 0x00 + 0x10: stur x16, [x21] "); } + #[test] + #[should_panic] + fn test_store_unserviceable() { + let (mut asm, mut cb) = setup_asm(); + // This would put the source into SCRATCH_REG, messing up the destination + asm.store(Opnd::mem(64, Opnd::Reg(Assembler::SCRATCH_REG), 0), 0x83902.into()); + + asm.compile_with_num_regs(&mut cb, 0); + } + /* #[test] fn test_emit_lea_label() { diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 36e783bd4e658a..b910052dae1b04 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -1822,29 +1822,16 @@ impl Assembler }; self.write_label(side_exit_label.clone()); - // Load an operand that cannot be used as a source of Insn::Store - fn split_store_source(asm: &mut Assembler, opnd: Opnd) -> Opnd { - if matches!(opnd, Opnd::Mem(_) | Opnd::Value(_)) || - (cfg!(target_arch = "aarch64") && matches!(opnd, Opnd::UImm(_))) { - asm.load_into(Opnd::Reg(Assembler::SCRATCH_REG), opnd); - Opnd::Reg(Assembler::SCRATCH_REG) - } else { - opnd - } - } - // Restore the PC and the stack for regular side exits. We don't do this for // side exits right after JIT-to-JIT calls, which restore them before the call. if let Some(SideExitContext { pc, stack, locals }) = context { asm_comment!(self, "write stack slots: {stack:?}"); for (idx, &opnd) in stack.iter().enumerate() { - let opnd = split_store_source(self, opnd); self.store(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), opnd); } asm_comment!(self, "write locals: {locals:?}"); for (idx, &opnd) in locals.iter().enumerate() { - let opnd = split_store_source(self, opnd); self.store(Opnd::mem(64, SP, (-local_size_and_idx_to_ep_offset(locals.len(), idx) - 1) * SIZEOF_VALUE_I32), opnd); } diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 4543252573800c..d21c7ee09c82d2 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -96,8 +96,10 @@ pub const ALLOC_REGS: &'static [Reg] = &[ impl Assembler { - // A special scratch register for intermediate processing. - // This register is caller-saved (so we don't have to save it before using it) + /// Special scratch registers for intermediate processing. + /// This register is call-clobbered (so we don't have to save it before using it). + /// Avoid using if you can since this is used to lower [Insn] internally and + /// so conflicts are possible. pub const SCRATCH_REG: Reg = R11_REG; const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); @@ -293,38 +295,11 @@ impl Assembler asm.push_insn(insn); }, - Insn::Mov { dest, src } | Insn::Store { dest, src } => { - match (&dest, &src) { - (Opnd::Mem(_), Opnd::Mem(_)) => { - // We load opnd1 because for mov, opnd0 is the output - let opnd1 = asm.load(*src); - asm.mov(*dest, opnd1); - }, - (Opnd::Mem(Mem { num_bits, .. }), Opnd::UImm(value)) => { - // For 64 bit destinations, 32-bit values will be sign-extended - if *num_bits == 64 && imm_num_bits(*value as i64) > 32 { - let opnd1 = asm.load(*src); - asm.mov(*dest, opnd1); - } else { - asm.mov(*dest, *src); - } - }, - (Opnd::Mem(Mem { num_bits, .. }), Opnd::Imm(value)) => { - // For 64 bit destinations, 32-bit values will be sign-extended - if *num_bits == 64 && imm_num_bits(*value) > 32 { - let opnd1 = asm.load(*src); - asm.mov(*dest, opnd1); - } else if uimm_num_bits(*value as u64) <= *num_bits { - // If the bit string is short enough for the destination, use the unsigned representation. - // Note that 64-bit and negative values are ruled out. - asm.mov(*dest, Opnd::UImm(*value as u64)); - } else { - asm.mov(*dest, *src); - } - }, - _ => { - asm.mov(*dest, *src); - } + Insn::Mov { dest, src } => { + if let Opnd::Mem(_) = dest { + asm.store(*dest, *src); + } else { + asm.mov(*dest, *src); } }, Insn::Not { opnd, .. } => { @@ -440,6 +415,14 @@ impl Assembler } } + fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec, dest_reg: X86Opnd, value: VALUE) { + // Using movabs because mov might write value in 32 bits + movabs(cb, dest_reg, value.0 as _); + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); + gc_offsets.push(ptr_offset); + } + // List of GC offsets let mut gc_offsets: Vec = Vec::new(); @@ -552,20 +535,71 @@ impl Assembler shr(cb, opnd.into(), shift.into()) }, - Insn::Store { dest, src } => { - mov(cb, dest.into(), src.into()); - }, + store_insn @ Insn::Store { dest, src } => { + let &Opnd::Mem(Mem { num_bits, base: MemBase::Reg(base_reg_no), disp: _ }) = dest else { + panic!("Unexpected Insn::Store destination in x64_emit: {dest:?}"); + }; + + // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG, + // so we panic to get the author to change their code. + #[track_caller] + fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) { + assert_ne!( + backend_use.reg_no, + user_use, + "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics" + ); + } + + let scratch = X86Opnd::Reg(Self::SCRATCH_REG); + let src = match src { + Opnd::Reg(_) => src.into(), + &Opnd::Mem(_) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + mov(cb, scratch, src.into()); + scratch + } + &Opnd::Imm(imm) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if num_bits == 64 && imm_num_bits(imm) > 32 { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + mov(cb, scratch, src.into()); + scratch + } else if uimm_num_bits(imm as u64) <= num_bits { + // If the bit string is short enough for the destination, use the unsigned representation. + // Note that 64-bit and negative values are ruled out. + uimm_opnd(imm as u64) + } else { + src.into() + } + } + &Opnd::UImm(imm) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if num_bits == 64 && imm_num_bits(imm as i64) > 32 { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + mov(cb, scratch, src.into()); + scratch + } else { + src.into() + } + } + &Opnd::Value(value) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + emit_load_gc_value(cb, &mut gc_offsets, scratch, value); + scratch + } + src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_emit: {src:?}") + + }; + mov(cb, dest.into(), src); + } // This assumes only load instructions can contain references to GC'd Value operands Insn::Load { opnd, out } | Insn::LoadInto { dest: out, opnd } => { match opnd { Opnd::Value(val) if val.heap_object_p() => { - // Using movabs because mov might write value in 32 bits - movabs(cb, out.into(), val.0 as _); - // The pointer immediate is encoded as the last part of the mov written out - let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); - gc_offsets.push(ptr_offset); + emit_load_gc_value(cb, &mut gc_offsets, out.into(), *val); } _ => mov(cb, out.into(), opnd.into()) } @@ -1352,4 +1386,21 @@ mod tests { 0x29: pop rbp "}); } + + #[test] + fn test_store_value_without_split() { + let (mut asm, mut cb) = setup_asm(); + + let imitation_heap_value = VALUE(0x1000); + assert!(imitation_heap_value.heap_object_p()); + asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into()); + + let gc_offsets = asm.x86_emit(&mut cb).unwrap(); + assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset"); + + assert_disasm!(cb, "49bb00100000000000004c891b", " + 0x0: movabs r11, 0x1000 + 0xa: mov qword ptr [rbx], r11 + "); + } } diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index e7ebc1414af86e..433b22e15d2c79 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -340,7 +340,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::SendWithoutBlockDirect { cd, state, self_val, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?, Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state))?, - Insn::InvokeBuiltin { bf, args, state } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?, + Insn::InvokeBuiltin { bf, args, state, .. } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?, Insn::Return { val } => return Some(gen_return(asm, opnd!(val))?), Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?, Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?, diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 9c29ed5472d8f1..8d1548f92b1717 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -15,6 +15,7 @@ use crate::hir_type::{types, Type}; pub struct Annotations { cfuncs: HashMap<*mut c_void, FnProperties>, + builtin_funcs: HashMap<*mut c_void, FnProperties>, } /// Runtime behaviors of C functions that implement a Ruby method @@ -41,6 +42,12 @@ impl Annotations { }; self.cfuncs.get(&fn_ptr).copied() } + + /// Query about properties of a builtin function by its pointer + pub fn get_builtin_properties(&self, bf: *const rb_builtin_function) -> Option { + let func_ptr = unsafe { (*bf).func_ptr as *mut c_void }; + self.builtin_funcs.get(&func_ptr).copied() + } } fn annotate_c_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class: VALUE, method_name: &'static str, props: FnProperties) { @@ -59,10 +66,78 @@ fn annotate_c_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class: props_map.insert(fn_ptr, props); } +/// Look up a method and find its builtin function pointer by parsing its ISEQ +/// We currently only support methods with exactly one invokebuiltin instruction +fn annotate_builtin_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class: VALUE, method_name: &'static str, props: FnProperties) { + unsafe { + let method_id = rb_intern2(method_name.as_ptr().cast(), method_name.len().try_into().unwrap()); + let method = rb_method_entry_at(class, method_id); + if method.is_null() { + panic!("Method {}#{} not found", std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"), method_name); + } + + // Cast ME to CME - they have identical layout + let cme = method.cast::(); + let def_type = get_cme_def_type(cme); + + if def_type != VM_METHOD_TYPE_ISEQ { + panic!("Method {}#{} is not an ISEQ method (type: {})", + std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"), + method_name, def_type); + } + + // Get the ISEQ from the method definition + let iseq = get_def_iseq_ptr((*cme).def); + if iseq.is_null() { + panic!("Failed to get ISEQ for {}#{}", + std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"), + method_name); + } + + // Get the size of the ISEQ in instruction units + let encoded_size = rb_iseq_encoded_size(iseq); + + // Scan through the ISEQ to find invokebuiltin instructions + let mut insn_idx: u32 = 0; + let mut func_ptr = std::ptr::null_mut::(); + + while insn_idx < encoded_size { + // Get the PC for this instruction index + let pc = rb_iseq_pc_at_idx(iseq, insn_idx); + + // Get the opcode using the proper decoder + let opcode = rb_iseq_opcode_at_pc(iseq, pc); + + if opcode == YARVINSN_invokebuiltin as i32 || + opcode == YARVINSN_opt_invokebuiltin_delegate as i32 || + opcode == YARVINSN_opt_invokebuiltin_delegate_leave as i32 { + // The first operand is the builtin function pointer + let bf_value = *pc.add(1); + let bf_ptr = bf_value.as_ptr() as *const rb_builtin_function; + + if func_ptr.is_null() { + func_ptr = (*bf_ptr).func_ptr as *mut c_void; + } else { + panic!("Multiple invokebuiltin instructions found in ISEQ for {}#{}", + std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"), + method_name); + } + } + + // Move to the next instruction using the proper length + insn_idx = insn_idx.saturating_add(rb_insn_len(VALUE(opcode as usize)).try_into().unwrap()); + } + + // Only insert the properties if its iseq has exactly one invokebuiltin instruction + props_map.insert(func_ptr, props); + } +} + /// Gather annotations. Run this right after boot since the annotations /// are about the stock versions of methods. pub fn init() -> Annotations { let cfuncs = &mut HashMap::new(); + let builtin_funcs = &mut HashMap::new(); macro_rules! annotate { ($module:ident, $method_name:literal, $return_type:expr, $($properties:ident),+) => { @@ -74,6 +149,22 @@ pub fn init() -> Annotations { } } + macro_rules! annotate_builtin { + ($module:ident, $method_name:literal, $return_type:expr) => { + annotate_builtin!($module, $method_name, $return_type, no_gc, leaf, elidable) + }; + ($module:ident, $method_name:literal, $return_type:expr, $($properties:ident),+) => { + let mut props = FnProperties { + no_gc: false, + leaf: false, + elidable: false, + return_type: $return_type + }; + $(props.$properties = true;)+ + annotate_builtin_method(builtin_funcs, unsafe { $module }, $method_name, props); + } + } + annotate!(rb_mKernel, "itself", types::BasicObject, no_gc, leaf, elidable); annotate!(rb_cString, "bytesize", types::Fixnum, no_gc, leaf); annotate!(rb_cModule, "name", types::StringExact.union(types::NilClass), no_gc, leaf, elidable); @@ -83,7 +174,12 @@ pub fn init() -> Annotations { annotate!(rb_cNilClass, "nil?", types::TrueClass, no_gc, leaf, elidable); annotate!(rb_mKernel, "nil?", types::FalseClass, no_gc, leaf, elidable); + annotate_builtin!(rb_mKernel, "Float", types::Flonum); + annotate_builtin!(rb_mKernel, "Integer", types::Integer); + annotate_builtin!(rb_mKernel, "class", types::Class, leaf); + Annotations { - cfuncs: std::mem::take(cfuncs) + cfuncs: std::mem::take(cfuncs), + builtin_funcs: std::mem::take(builtin_funcs), } } diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 61514c88769d87..976580c85b25e1 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -522,7 +522,12 @@ pub enum Insn { }, // Invoke a builtin function - InvokeBuiltin { bf: rb_builtin_function, args: Vec, state: InsnId }, + InvokeBuiltin { + bf: rb_builtin_function, + args: Vec, + state: InsnId, + return_type: Option, // None for unannotated builtins + }, /// Control flow instructions Return { val: InsnId }, @@ -1163,7 +1168,7 @@ impl Function { args: find_vec!(args), state, }, - &InvokeBuiltin { bf, ref args, state } => InvokeBuiltin { bf: bf, args: find_vec!(args), state }, + &InvokeBuiltin { bf, ref args, state, return_type } => InvokeBuiltin { bf, args: find_vec!(args), state, return_type }, &ArrayDup { val, state } => ArrayDup { val: find!(val), state }, &HashDup { val, state } => HashDup { val: find!(val), state }, &CCall { cfun, ref args, name, return_type, elidable } => CCall { cfun, args: find_vec!(args), name, return_type, elidable }, @@ -1260,7 +1265,7 @@ impl Function { Insn::SendWithoutBlock { .. } => types::BasicObject, Insn::SendWithoutBlockDirect { .. } => types::BasicObject, Insn::Send { .. } => types::BasicObject, - Insn::InvokeBuiltin { .. } => types::BasicObject, + Insn::InvokeBuiltin { return_type, .. } => return_type.unwrap_or(types::BasicObject), Insn::Defined { .. } => types::BasicObject, Insn::DefinedIvar { .. } => types::BasicObject, Insn::GetConstantPath { .. } => types::BasicObject, @@ -3119,7 +3124,18 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { args.reverse(); let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { bf, args, state: exit_id }); + + // Check if this builtin is annotated + let return_type = ZJITState::get_method_annotations() + .get_builtin_properties(&bf) + .map(|props| props.return_type); + + let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { + bf, + args, + state: exit_id, + return_type, + }); state.stack_push(insn_id); } YARVINSN_opt_invokebuiltin_delegate | @@ -3134,7 +3150,18 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { bf, args, state: exit_id }); + + // Check if this builtin is annotated + let return_type = ZJITState::get_method_annotations() + .get_builtin_properties(&bf) + .map(|props| props.return_type); + + let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { + bf, + args, + state: exit_id, + return_type, + }); state.stack_push(insn_id); } YARVINSN_objtostring => { @@ -4973,23 +5000,53 @@ mod tests { } #[test] - fn test_invokebuiltin_delegate_with_args() { + fn test_invokebuiltin_delegate_annotated() { assert_method_hir_with_opcode("Float", YARVINSN_opt_invokebuiltin_delegate_leave, expect![[r#" fn Float@:197: bb0(v0:BasicObject, v1:BasicObject, v2:BasicObject, v3:BasicObject): - v6:BasicObject = InvokeBuiltin rb_f_float, v0, v1, v2 + v6:Flonum = InvokeBuiltin rb_f_float, v0, v1, v2 Jump bb1(v0, v1, v2, v3, v6) - bb1(v8:BasicObject, v9:BasicObject, v10:BasicObject, v11:BasicObject, v12:BasicObject): + bb1(v8:BasicObject, v9:BasicObject, v10:BasicObject, v11:BasicObject, v12:Flonum): Return v12 "#]]); } #[test] - fn test_invokebuiltin_delegate_without_args() { + fn test_invokebuiltin_cexpr_annotated() { assert_method_hir_with_opcode("class", YARVINSN_opt_invokebuiltin_delegate_leave, expect![[r#" fn class@:20: bb0(v0:BasicObject): - v3:BasicObject = InvokeBuiltin _bi20, v0 + v3:Class = InvokeBuiltin _bi20, v0 + Jump bb1(v0, v3) + bb1(v5:BasicObject, v6:Class): + Return v6 + "#]]); + } + + #[test] + fn test_invokebuiltin_delegate_with_args() { + // Using an unannotated builtin to test InvokeBuiltin generation + let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("Dir", "open")); + assert!(iseq_contains_opcode(iseq, YARVINSN_opt_invokebuiltin_delegate), "iseq Dir.open does not contain invokebuiltin"); + let function = iseq_to_hir(iseq).unwrap(); + assert_function_hir(function, expect![[r#" + fn open@:184: + bb0(v0:BasicObject, v1:BasicObject, v2:BasicObject, v3:BasicObject, v4:BasicObject): + v5:NilClass = Const Value(nil) + v8:BasicObject = InvokeBuiltin dir_s_open, v0, v1, v2 + SideExit UnknownOpcode(getblockparamproxy) + "#]]); + } + + #[test] + fn test_invokebuiltin_delegate_without_args() { + let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("GC", "enable")); + assert!(iseq_contains_opcode(iseq, YARVINSN_opt_invokebuiltin_delegate_leave), "iseq GC.enable does not contain invokebuiltin"); + let function = iseq_to_hir(iseq).unwrap(); + assert_function_hir(function, expect![[r#" + fn enable@:55: + bb0(v0:BasicObject): + v3:BasicObject = InvokeBuiltin gc_enable, v0 Jump bb1(v0, v3) bb1(v5:BasicObject, v6:BasicObject): Return v6