diff --git a/test/.excludes-zjit/TestKeywordArguments.rb b/test/.excludes-zjit/TestKeywordArguments.rb
deleted file mode 100644
index f52bdf6d30d6c6..00000000000000
--- a/test/.excludes-zjit/TestKeywordArguments.rb
+++ /dev/null
@@ -1 +0,0 @@
-exclude(/test_/, 'Multiple tests make ZJIT panic')
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb
index 684a4bb2d40723..6db57e18ba729c 100644
--- a/test/ruby/test_zjit.rb
+++ b/test/ruby/test_zjit.rb
@@ -1095,6 +1095,14 @@ def test_require_rubygems_with_auto_compact
     }, call_threshold: 2
   end
 
+  def test_stats
+    assert_runs 'true', %q{
+      def test = 1
+      test
+      RubyVM::ZJIT.stats[:zjit_insns_count] > 0
+    }, stats: true
+  end
+
   def test_zjit_option_uses_array_each_in_ruby
     omit 'ZJIT wrongly compiles Array#each, so it is disabled for now'
     assert_runs '"<internal:array>"', %q{
@@ -1414,12 +1422,13 @@ def assert_runs(expected, test_script, insns: [], assert_compiles: false, **opts
   end
 
   # Run a Ruby process with ZJIT options and a pipe for writing test results
-  def eval_with_jit(script, call_threshold: 1, num_profiles: 1, timeout: 1000, pipe_fd:, debug: true)
+  def eval_with_jit(script, call_threshold: 1, num_profiles: 1, stats: false, debug: true, timeout: 1000, pipe_fd:)
     args = [
       "--disable-gems",
       "--zjit-call-threshold=#{call_threshold}",
       "--zjit-num-profiles=#{num_profiles}",
     ]
+    args << "--zjit-stats" if stats
     args << "--zjit-debug" if debug
     args << "-e" << script_shell_encode(script)
     pipe_r, pipe_w = IO.pipe
diff --git a/vm_method.c b/vm_method.c
index 874e25ed7666e6..76b1c97d046c2b 100644
--- a/vm_method.c
+++ b/vm_method.c
@@ -136,17 +136,19 @@ rb_vm_cc_table_create(size_t capa)
 static enum rb_id_table_iterator_result
 vm_cc_table_dup_i(ID key, VALUE old_ccs_ptr, void *data)
 {
+    VALUE new_table = (VALUE)data;
     struct rb_class_cc_entries *old_ccs = (struct rb_class_cc_entries *)old_ccs_ptr;
     size_t memsize = vm_ccs_alloc_size(old_ccs->capa);
-    struct rb_class_cc_entries *new_ccs = ruby_xmalloc(memsize);
+    struct rb_class_cc_entries *new_ccs = ruby_xcalloc(1, memsize);
+    rb_managed_id_table_insert(new_table, key, (VALUE)new_ccs);
+
     memcpy(new_ccs, old_ccs, memsize);
 
 #if VM_CHECK_MODE > 0
     new_ccs->debug_sig = ~(VALUE)new_ccs;
 #endif
 
-    VALUE new_table = (VALUE)data;
-    rb_managed_id_table_insert(new_table, key, (VALUE)new_ccs);
+    RB_OBJ_WRITTEN(new_table, Qundef, (VALUE)new_ccs->cme);
     for (int index = 0; index < new_ccs->len; index++) {
         RB_OBJ_WRITTEN(new_table, Qundef, new_ccs->entries[index].cc);
     }
diff --git a/zjit/src/asm/arm64/inst/mov.rs b/zjit/src/asm/arm64/inst/mov.rs
index eae4565c3ab0ce..58877ae94040c7 100644
--- a/zjit/src/asm/arm64/inst/mov.rs
+++ b/zjit/src/asm/arm64/inst/mov.rs
@@ -2,6 +2,9 @@ use super::super::arg::Sf;
 
 /// Which operation is being performed.
 enum Op {
+    /// A movn operation which inverts the immediate and zeroes out the other bits.
+    MOVN = 0b00,
+
     /// A movz operation which zeroes out the other bits.
     MOVZ = 0b10,
 
@@ -61,6 +64,12 @@ impl Mov {
         Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() }
     }
 
+    /// MOVN
+    /// <https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/MOVN--Move-wide-with-NOT->
+    pub fn movn(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
+        Self { rd, imm16, hw: hw.into(), op: Op::MOVN, sf: num_bits.into() }
+    }
+
     /// MOVZ
     /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en>
     pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
@@ -104,6 +113,34 @@ mod tests {
         assert_eq!(0xf2800f60, result);
     }
 
+    #[test]
+    fn test_movn_unshifted() {
+        let inst = Mov::movn(0, 123, 0, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0x92800f60, result);
+    }
+
+    #[test]
+    fn test_movn_shifted_16() {
+        let inst = Mov::movn(0, 123, 16, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0x92a00f60, result);
+    }
+
+    #[test]
+    fn test_movn_shifted_32() {
+        let inst = Mov::movn(0, 123, 32, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0x92c00f60, result);
+    }
+
+    #[test]
+    fn test_movn_shifted_48() {
+        let inst = Mov::movn(0, 123, 48, 64);
+        let result: u32 = inst.into();
+        assert_eq!(0x92e00f60, result);
+    }
+
     #[test]
     fn test_movk_shifted_16() {
         let inst = Mov::movk(0, 123, 16, 64);
diff --git a/zjit/src/asm/arm64/mod.rs b/zjit/src/asm/arm64/mod.rs
index ae14d561f65b16..0576b230907ff5 100644
--- a/zjit/src/asm/arm64/mod.rs
+++ b/zjit/src/asm/arm64/mod.rs
@@ -716,6 +716,21 @@ pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
     cb.write_bytes(&bytes);
 }
 
+/// MOVN - load a register with the complement of a shifted then zero extended 16-bit immediate
+/// <https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/MOVN--Move-wide-with-NOT->
+pub fn movn(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
+    let bytes: [u8; 4] = match (rd, imm16) {
+        (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => {
+            assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less.");
+
+            Mov::movn(rd.reg_no, imm16 as u16, shift, rd.num_bits).into()
+        },
+        _ => panic!("Invalid operand combination to movn instruction.")
+    };
+
+    cb.write_bytes(&bytes);
+}
+
 /// MOVZ - move a 16 bit immediate into a register, zero the other bits
 pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
     let bytes: [u8; 4] = match (rd, imm16) {
@@ -1543,6 +1558,11 @@ mod tests {
         check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16));
     }
 
+    #[test]
+    fn test_movn() {
+        check_bytes("600fa092", |cb| movn(cb, X0, A64Opnd::new_uimm(123), 16));
+    }
+
     #[test]
     fn test_movz() {
         check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16));
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
index a1243f45e17e58..148d01ea862e97 100644
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@@ -140,6 +140,10 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
         // instruction, then we'll use that.
         movz(cb, rd, A64Opnd::new_uimm(current), 0);
         return 1;
+    } else if u16::try_from(!value).is_ok() {
+        // For small negative values, use a single movn
+        movn(cb, rd, A64Opnd::new_uimm(!value), 0);
+        return 1;
     } else if BitmaskImmediate::try_from(current).is_ok() {
         // Otherwise, if the immediate can be encoded
         // with the special bitmask immediate encoding,
@@ -191,11 +195,15 @@ pub const ALLOC_REGS: &'static [Reg] = &[
 
 impl Assembler
 {
-    // Special scratch registers for intermediate processing.
-    // This register is caller-saved (so we don't have to save it before using it)
+    /// Special scratch registers for intermediate processing.
+    /// This register is call-clobbered (so we don't have to save it before using it).
+    /// Avoid using if you can since this is used to lower [Insn] internally and
+    /// so conflicts are possible.
     pub const SCRATCH_REG: Reg = X16_REG;
-    const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG);
-    const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
+    const SCRATCH0_REG: Reg = Self::SCRATCH_REG;
+    const SCRATCH1_REG: Reg = X17_REG;
+    const SCRATCH0: A64Opnd = A64Opnd::Reg(Self::SCRATCH0_REG);
+    const SCRATCH1: A64Opnd = A64Opnd::Reg(Self::SCRATCH1_REG);
 
     /// Get the list of registers from which we will allocate on this platform
     pub fn get_alloc_regs() -> Vec<Reg> {
@@ -648,31 +656,6 @@ impl Assembler
                     *opnd = split_load_operand(asm, *opnd);
                     asm.push_insn(insn);
                 },
-                Insn::Store { dest, src } => {
-                    // The value being stored must be in a register, so if it's
-                    // not already one we'll load it first.
-                    let opnd1 = match src {
-                         // If the first operand is zero, then we can just use
-                        // the zero register.
-                        Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG),
-                        // Otherwise we'll check if we need to load it first.
-                        _ => split_load_operand(asm, *src)
-                    };
-
-                    match dest {
-                        Opnd::Reg(_) => {
-                            // Store does not support a register as a dest operand.
-                            asm.mov(*dest, opnd1);
-                        }
-                        _ => {
-                            // The displacement for the STUR instruction can't be more
-                            // than 9 bits long. If it's longer, we need to load the
-                            // memory address into a register first.
-                            let opnd0 = split_memory_address(asm, *dest);
-                            asm.store(opnd0, opnd1);
-                        }
-                    }
-                },
                 Insn::Mul { left, right, .. } => {
                     *left = split_load_operand(asm, *left);
                     *right = split_load_operand(asm, *right);
@@ -839,6 +822,42 @@ impl Assembler
             }
         }
 
+        /// Do the address calculation of `out_reg = base_reg + disp`
+        fn load_effective_address(cb: &mut CodeBlock, out: A64Opnd, base_reg_no: u8, disp: i32) {
+            let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no });
+            assert_ne!(31, out.unwrap_reg().reg_no, "Lea sp, [sp, #imm] not always encodable. Use add/sub instead.");
+
+            if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() {
+                // Use ADD/SUB if the displacement fits
+                add(cb, out, base_reg, A64Opnd::new_imm(disp.into()));
+            } else {
+                // Use add_extended() to interpret reg_no=31 as sp
+                // since the base register is never the zero register.
+                // Careful! Only the first two operands can refer to sp.
+                emit_load_value(cb, out, disp as u64);
+                add_extended(cb, out, base_reg, out);
+            };
+        }
+
+        /// Load a VALUE to a register and remember it for GC marking and reference updating
+        fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec<CodePtr>, dest: A64Opnd, value: VALUE) {
+            // We dont need to check if it's a special const
+            // here because we only allow these operands to hit
+            // this point if they're not a special const.
+            assert!(!value.special_const_p());
+
+            // This assumes only load instructions can contain
+            // references to GC'd Value operands. If the value
+            // being loaded is a heap object, we'll report that
+            // back out to the gc_offsets list.
+            ldr_literal(cb, dest, 2.into());
+            b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
+            cb.write_bytes(&value.as_u64().to_le_bytes());
+
+            let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
+            gc_offsets.push(ptr_offset);
+        }
+
         /// Emit a push instruction for the given operand by adding to the stack
         /// pointer and then storing the given value.
         fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) {
@@ -1009,12 +1028,84 @@ impl Assembler
                 Insn::LShift { opnd, shift, out } => {
                     lsl(cb, out.into(), opnd.into(), shift.into());
                 },
-                Insn::Store { dest, src } => {
+                store_insn @ Insn::Store { dest, src } => {
+                    // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are
+                    // accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory
+                    // base register to gurantee things will work.
+                    let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else {
+                        panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}");
+                    };
+
+                    // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
+                    // so we panic to get the author to change their code.
+                    #[track_caller]
+                    fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
+                        assert_ne!(
+                            backend_use.reg_no,
+                            user_use,
+                            "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
+                        );
+                    }
+
+                    // Split src into SCRATCH0 if necessary
+                    let src_reg: A64Reg = match src {
+                        Opnd::Reg(reg) => *reg,
+                        // Use zero register when possible
+                        Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG,
+                        // Immediates
+                        &Opnd::Imm(imm) => {
+                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
+                            emit_load_value(cb, Self::SCRATCH0, imm as u64);
+                            Self::SCRATCH0_REG
+                        }
+                        &Opnd::UImm(imm) => {
+                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
+                            emit_load_value(cb, Self::SCRATCH0, imm);
+                            Self::SCRATCH0_REG
+                        }
+                        &Opnd::Value(value) => {
+                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
+                            emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value);
+                            Self::SCRATCH0_REG
+                        }
+                        src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => {
+                            // For mem-to-mem store, load the source into SCRATCH0
+                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
+                            let src_mem = if mem_disp_fits_bits(src_disp) {
+                                src_mem.into()
+                            } else {
+                                // Split the load address into SCRATCH0 first if necessary
+                                assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG);
+                                load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp);
+                                A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0)
+                            };
+                            match src_num_bits {
+                                64 | 32 => ldur(cb, Self::SCRATCH0, src_mem),
+                                16 => ldurh(cb, Self::SCRATCH0, src_mem),
+                                8 => ldurb(cb, Self::SCRATCH0, src_mem),
+                                num_bits => panic!("unexpected num_bits: {num_bits}")
+                            };
+                            Self::SCRATCH0_REG
+                        }
+                        src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}")
+                    };
+                    let src = A64Opnd::Reg(src_reg);
+
+                    // Split dest into SCRATCH1 if necessary.
+                    let dest = if mem_disp_fits_bits(disp) {
+                        dest.into()
+                    } else {
+                        assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG);
+                        assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG);
+                        load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp);
+                        A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0)
+                    };
+
                     // This order may be surprising but it is correct. The way
                     // the Arm64 assembler works, the register that is going to
                     // be stored is first and the address is second. However in
                     // our IR we have the address first and the register second.
-                    match dest.rm_num_bits() {
+                    match dest_num_bits {
                         64 | 32 => stur(cb, src.into(), dest.into()),
                         16 => sturh(cb, src.into(), dest.into()),
                         num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest),
@@ -1041,21 +1132,7 @@ impl Assembler
                             };
                         },
                         Opnd::Value(value) => {
-                            // We dont need to check if it's a special const
-                            // here because we only allow these operands to hit
-                            // this point if they're not a special const.
-                            assert!(!value.special_const_p());
-
-                            // This assumes only load instructions can contain
-                            // references to GC'd Value operands. If the value
-                            // being loaded is a heap object, we'll report that
-                            // back out to the gc_offsets list.
-                            ldr_literal(cb, out.into(), 2.into());
-                            b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
-                            cb.write_bytes(&value.as_u64().to_le_bytes());
-
-                            let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
-                            gc_offsets.push(ptr_offset);
+                            emit_load_gc_value(cb, &mut gc_offsets, out.into(), value);
                         },
                         Opnd::None => {
                             unreachable!("Attempted to load from None operand");
@@ -1093,20 +1170,7 @@ impl Assembler
                     let &Opnd::Mem(Mem { num_bits: _, base: MemBase::Reg(base_reg_no), disp }) = opnd else {
                         panic!("Unexpected Insn::Lea operand in arm64_emit: {opnd:?}");
                     };
-                    let out: A64Opnd = out.into();
-                    let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no });
-                    assert_ne!(31, out.unwrap_reg().reg_no, "Insn::Lea sp, [sp, #imm] not always encodable. Use add/sub instead.");
-
-                    if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() {
-                        // Use ADD/SUB if the displacement fits
-                        add(cb, out, base_reg, A64Opnd::new_imm(disp.into()));
-                    } else {
-                        // Use add_extended() to interpret reg_no=31 as sp
-                        // since the base register is never the zero register.
-                        // Careful! Only the first two operands can refer to sp.
-                        emit_load_value(cb, out, disp as u64);
-                        add_extended(cb, out, base_reg, out);
-                    };
+                    load_effective_address(cb, out.into(), base_reg_no, disp);
                 }
                 Insn::LeaJumpTarget { out, target, .. } => {
                     if let Target::Label(label_idx) = target {
@@ -1241,8 +1305,7 @@ impl Assembler
                     }
                     last_patch_pos = Some(cb.get_write_pos());
                 },
-                Insn::IncrCounter { mem: _, value: _ } => {
-                    /*
+                Insn::IncrCounter { mem, value } => {
                     let label = cb.new_label("incr_counter_loop".to_string());
                     cb.write_label(label);
 
@@ -1258,8 +1321,6 @@ impl Assembler
 
                     cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0));
                     emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label));
-                    */
-                    unimplemented!("labels are not supported yet");
                 },
                 Insn::Breakpoint => {
                     brk(cb, A64Opnd::None);
@@ -1592,15 +1653,16 @@ mod tests {
 
         // Test values that exercise various types of immediates.
         //  - 9 bit displacement for Load/Store
-        //  - 12 bit shifted immediate
+        //  - 12 bit ADD/SUB shifted immediate
+        //  - 16 bit MOV family shifted immediates
         //  - bit mask immediates
-        for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x1008, i32::MIN] {
+        for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x10008, i32::MIN] {
             let mem = Opnd::mem(64, NATIVE_STACK_PTR, displacement);
             asm.lea_into(Opnd::Reg(X0_REG), mem);
         }
 
         asm.compile_with_num_regs(&mut cb, 0);
-        assert_disasm!(cb, "e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d100009dd2e0ffbff2e0ffdff2e0fffff2e063208b00ff9dd2e0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b", "
+        assert_disasm!(cb, "e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d1e0ff8292e063208b00ff9fd2c0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b", "
             0x0: orr x0, xzr, #0x7fffffff
             0x4: add x0, sp, x0
             0x8: mov x0, #8
@@ -1610,21 +1672,76 @@ mod tests {
             0x18: add x0, sp, x0
             0x1c: add x0, sp, #0x208
             0x20: sub x0, sp, #0x208
-            0x24: mov x0, #0xe800
-            0x28: movk x0, #0xffff, lsl #16
-            0x2c: movk x0, #0xffff, lsl #32
-            0x30: movk x0, #0xffff, lsl #48
-            0x34: add x0, sp, x0
-            0x38: mov x0, #0xeff8
-            0x3c: movk x0, #0xffff, lsl #16
-            0x40: movk x0, #0xffff, lsl #32
-            0x44: movk x0, #0xffff, lsl #48
-            0x48: add x0, sp, x0
-            0x4c: orr x0, xzr, #0xffffffff80000000
-            0x50: add x0, sp, x0
+            0x24: mov x0, #-0x1800
+            0x28: add x0, sp, x0
+            0x2c: mov x0, #0xfff8
+            0x30: movk x0, #0xfffe, lsl #16
+            0x34: movk x0, #0xffff, lsl #32
+            0x38: movk x0, #0xffff, lsl #48
+            0x3c: add x0, sp, x0
+            0x40: orr x0, xzr, #0xffffffff80000000
+            0x44: add x0, sp, x0
+        ");
+    }
+
+    #[test]
+    fn test_store() {
+        let (mut asm, mut cb) = setup_asm();
+
+        // Large memory offsets in combinations of destination and source
+        let large_mem = Opnd::mem(64, NATIVE_STACK_PTR, -0x305);
+        let small_mem = Opnd::mem(64, C_RET_OPND, 0);
+        asm.store(small_mem, large_mem);
+        asm.store(large_mem, small_mem);
+        asm.store(large_mem, large_mem);
+
+        asm.compile_with_num_regs(&mut cb, 0);
+        assert_disasm!(cb, "f0170cd1100240f8100000f8100040f8f1170cd1300200f8f0170cd1100240f8f1170cd1300200f8", "
+            0x0: sub x16, sp, #0x305
+            0x4: ldur x16, [x16]
+            0x8: stur x16, [x0]
+            0xc: ldur x16, [x0]
+            0x10: sub x17, sp, #0x305
+            0x14: stur x16, [x17]
+            0x18: sub x16, sp, #0x305
+            0x1c: ldur x16, [x16]
+            0x20: sub x17, sp, #0x305
+            0x24: stur x16, [x17]
+        ");
+    }
+
+    #[test]
+    fn test_store_value_without_split() {
+        let (mut asm, mut cb) = setup_asm();
+
+        let imitation_heap_value = VALUE(0x1000);
+        assert!(imitation_heap_value.heap_object_p());
+        asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());
+
+        // Side exit code are compiled without the split pass, so we directly call emit here to
+        // emulate that scenario.
+        let gc_offsets = asm.arm64_emit(&mut cb).unwrap();
+        assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");
+
+        assert_disasm!(cb, "50000058030000140010000000000000b00200f8", "
+            0x0: ldr x16, #8
+            0x4: b #0x10
+            0x8: .byte 0x00, 0x10, 0x00, 0x00
+            0xc: .byte 0x00, 0x00, 0x00, 0x00
+            0x10: stur x16, [x21]
         ");
     }
 
+    #[test]
+    #[should_panic]
+    fn test_store_unserviceable() {
+        let (mut asm, mut cb) = setup_asm();
+        // This would put the source into SCRATCH_REG, messing up the destination
+        asm.store(Opnd::mem(64, Opnd::Reg(Assembler::SCRATCH_REG), 0), 0x83902.into());
+
+        asm.compile_with_num_regs(&mut cb, 0);
+    }
+
     /*
     #[test]
     fn test_emit_lea_label() {
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index 36e783bd4e658a..b910052dae1b04 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -1822,29 +1822,16 @@ impl Assembler
                 };
                 self.write_label(side_exit_label.clone());
 
-                // Load an operand that cannot be used as a source of Insn::Store
-                fn split_store_source(asm: &mut Assembler, opnd: Opnd) -> Opnd {
-                    if matches!(opnd, Opnd::Mem(_) | Opnd::Value(_)) ||
-                        (cfg!(target_arch = "aarch64") && matches!(opnd, Opnd::UImm(_))) {
-                        asm.load_into(Opnd::Reg(Assembler::SCRATCH_REG), opnd);
-                        Opnd::Reg(Assembler::SCRATCH_REG)
-                    } else {
-                        opnd
-                    }
-                }
-
                 // Restore the PC and the stack for regular side exits. We don't do this for
                 // side exits right after JIT-to-JIT calls, which restore them before the call.
                 if let Some(SideExitContext { pc, stack, locals }) = context {
                     asm_comment!(self, "write stack slots: {stack:?}");
                     for (idx, &opnd) in stack.iter().enumerate() {
-                        let opnd = split_store_source(self, opnd);
                         self.store(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), opnd);
                     }
 
                     asm_comment!(self, "write locals: {locals:?}");
                     for (idx, &opnd) in locals.iter().enumerate() {
-                        let opnd = split_store_source(self, opnd);
                         self.store(Opnd::mem(64, SP, (-local_size_and_idx_to_ep_offset(locals.len(), idx) - 1) * SIZEOF_VALUE_I32), opnd);
                     }
 
diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs
index 4543252573800c..d21c7ee09c82d2 100644
--- a/zjit/src/backend/x86_64/mod.rs
+++ b/zjit/src/backend/x86_64/mod.rs
@@ -96,8 +96,10 @@ pub const ALLOC_REGS: &'static [Reg] = &[
 
 impl Assembler
 {
-    // A special scratch register for intermediate processing.
-    // This register is caller-saved (so we don't have to save it before using it)
+    /// Special scratch registers for intermediate processing.
+    /// This register is call-clobbered (so we don't have to save it before using it).
+    /// Avoid using if you can since this is used to lower [Insn] internally and
+    /// so conflicts are possible.
     pub const SCRATCH_REG: Reg = R11_REG;
     const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG);
 
@@ -293,38 +295,11 @@ impl Assembler
 
                     asm.push_insn(insn);
                 },
-                Insn::Mov { dest, src } | Insn::Store { dest, src } => {
-                    match (&dest, &src) {
-                        (Opnd::Mem(_), Opnd::Mem(_)) => {
-                            // We load opnd1 because for mov, opnd0 is the output
-                            let opnd1 = asm.load(*src);
-                            asm.mov(*dest, opnd1);
-                        },
-                        (Opnd::Mem(Mem { num_bits, .. }), Opnd::UImm(value)) => {
-                            // For 64 bit destinations, 32-bit values will be sign-extended
-                            if *num_bits == 64 && imm_num_bits(*value as i64) > 32 {
-                                let opnd1 = asm.load(*src);
-                                asm.mov(*dest, opnd1);
-                            } else {
-                                asm.mov(*dest, *src);
-                            }
-                        },
-                        (Opnd::Mem(Mem { num_bits, .. }), Opnd::Imm(value)) => {
-                            // For 64 bit destinations, 32-bit values will be sign-extended
-                            if *num_bits == 64 && imm_num_bits(*value) > 32 {
-                                let opnd1 = asm.load(*src);
-                                asm.mov(*dest, opnd1);
-                            } else if uimm_num_bits(*value as u64) <= *num_bits {
-                                // If the bit string is short enough for the destination, use the unsigned representation.
-                                // Note that 64-bit and negative values are ruled out.
-                                asm.mov(*dest, Opnd::UImm(*value as u64));
-                            } else {
-                                asm.mov(*dest, *src);
-                            }
-                        },
-                        _ => {
-                            asm.mov(*dest, *src);
-                        }
+                Insn::Mov { dest, src } => {
+                    if let Opnd::Mem(_) = dest {
+                        asm.store(*dest, *src);
+                    } else {
+                        asm.mov(*dest, *src);
                     }
                 },
                 Insn::Not { opnd, .. } => {
@@ -440,6 +415,14 @@ impl Assembler
             }
         }
 
+        fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec<CodePtr>, dest_reg: X86Opnd, value: VALUE) {
+            // Using movabs because mov might write value in 32 bits
+            movabs(cb, dest_reg, value.0 as _);
+            // The pointer immediate is encoded as the last part of the mov written out
+            let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
+            gc_offsets.push(ptr_offset);
+        }
+
         // List of GC offsets
         let mut gc_offsets: Vec<CodePtr> = Vec::new();
 
@@ -552,20 +535,71 @@ impl Assembler
                     shr(cb, opnd.into(), shift.into())
                 },
 
-                Insn::Store { dest, src } => {
-                    mov(cb, dest.into(), src.into());
-                },
+                store_insn @ Insn::Store { dest, src } => {
+                    let &Opnd::Mem(Mem { num_bits, base: MemBase::Reg(base_reg_no), disp: _ }) = dest else {
+                        panic!("Unexpected Insn::Store destination in x64_emit: {dest:?}");
+                    };
+
+                    // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
+                    // so we panic to get the author to change their code.
+                    #[track_caller]
+                    fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
+                        assert_ne!(
+                            backend_use.reg_no,
+                            user_use,
+                            "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
+                        );
+                    }
+
+                    let scratch = X86Opnd::Reg(Self::SCRATCH_REG);
+                    let src = match src {
+                        Opnd::Reg(_) => src.into(),
+                        &Opnd::Mem(_) => {
+                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
+                            mov(cb, scratch, src.into());
+                            scratch
+                        }
+                        &Opnd::Imm(imm) => {
+                            // For 64 bit destinations, 32-bit values will be sign-extended
+                            if num_bits == 64 && imm_num_bits(imm) > 32 {
+                                assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
+                                mov(cb, scratch, src.into());
+                                scratch
+                            } else if uimm_num_bits(imm as u64) <= num_bits {
+                                // If the bit string is short enough for the destination, use the unsigned representation.
+                                // Note that 64-bit and negative values are ruled out.
+                                uimm_opnd(imm as u64)
+                            } else {
+                                src.into()
+                            }
+                        }
+                        &Opnd::UImm(imm) => {
+                            // For 64 bit destinations, 32-bit values will be sign-extended
+                            if num_bits == 64 && imm_num_bits(imm as i64) > 32 {
+                                assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
+                                mov(cb, scratch, src.into());
+                                scratch
+                            } else {
+                                src.into()
+                            }
+                        }
+                        &Opnd::Value(value) => {
+                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
+                            emit_load_gc_value(cb, &mut gc_offsets, scratch, value);
+                            scratch
+                        }
+                        src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_emit: {src:?}")
+
+                    };
+                    mov(cb, dest.into(), src);
+                }
 
                 // This assumes only load instructions can contain references to GC'd Value operands
                 Insn::Load { opnd, out } |
                 Insn::LoadInto { dest: out, opnd } => {
                     match opnd {
                         Opnd::Value(val) if val.heap_object_p() => {
-                            // Using movabs because mov might write value in 32 bits
-                            movabs(cb, out.into(), val.0 as _);
-                            // The pointer immediate is encoded as the last part of the mov written out
-                            let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
-                            gc_offsets.push(ptr_offset);
+                            emit_load_gc_value(cb, &mut gc_offsets, out.into(), *val);
                         }
                         _ => mov(cb, out.into(), opnd.into())
                     }
@@ -1352,4 +1386,21 @@ mod tests {
             0x29: pop rbp
         "});
     }
+
+    #[test]
+    fn test_store_value_without_split() {
+        let (mut asm, mut cb) = setup_asm();
+
+        let imitation_heap_value = VALUE(0x1000);
+        assert!(imitation_heap_value.heap_object_p());
+        asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());
+
+        let gc_offsets = asm.x86_emit(&mut cb).unwrap();
+        assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");
+
+        assert_disasm!(cb, "49bb00100000000000004c891b", "
+            0x0: movabs r11, 0x1000
+            0xa: mov qword ptr [rbx], r11
+        ");
+    }
 }
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index e7ebc1414af86e..433b22e15d2c79 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -340,7 +340,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
         Insn::SendWithoutBlockDirect { cd, state, self_val, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self
             gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?,
         Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state))?,
-        Insn::InvokeBuiltin { bf, args, state } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?,
+        Insn::InvokeBuiltin { bf, args, state, .. } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?,
         Insn::Return { val } => return Some(gen_return(asm, opnd!(val))?),
         Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
         Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs
index 9c29ed5472d8f1..8d1548f92b1717 100644
--- a/zjit/src/cruby_methods.rs
+++ b/zjit/src/cruby_methods.rs
@@ -15,6 +15,7 @@ use crate::hir_type::{types, Type};
 
 pub struct Annotations {
     cfuncs: HashMap<*mut c_void, FnProperties>,
+    builtin_funcs: HashMap<*mut c_void, FnProperties>,
 }
 
 /// Runtime behaviors of C functions that implement a Ruby method
@@ -41,6 +42,12 @@ impl Annotations {
         };
         self.cfuncs.get(&fn_ptr).copied()
     }
+
+    /// Query about properties of a builtin function by its pointer
+    pub fn get_builtin_properties(&self, bf: *const rb_builtin_function) -> Option<FnProperties> {
+        let func_ptr = unsafe { (*bf).func_ptr as *mut c_void };
+        self.builtin_funcs.get(&func_ptr).copied()
+    }
 }
 
 fn annotate_c_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class: VALUE, method_name: &'static str, props: FnProperties) {
@@ -59,10 +66,78 @@ fn annotate_c_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class:
     props_map.insert(fn_ptr, props);
 }
 
+/// Look up a method and find its builtin function pointer by parsing its ISEQ
+/// We currently only support methods with exactly one invokebuiltin instruction
+fn annotate_builtin_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class: VALUE, method_name: &'static str, props: FnProperties) {
+    unsafe {
+        let method_id = rb_intern2(method_name.as_ptr().cast(), method_name.len().try_into().unwrap());
+        let method = rb_method_entry_at(class, method_id);
+        if method.is_null() {
+            panic!("Method {}#{} not found", std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"), method_name);
+        }
+
+        // Cast ME to CME - they have identical layout
+        let cme = method.cast::<rb_callable_method_entry_t>();
+        let def_type = get_cme_def_type(cme);
+
+        if def_type != VM_METHOD_TYPE_ISEQ {
+            panic!("Method {}#{} is not an ISEQ method (type: {})",
+                std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"),
+                method_name, def_type);
+        }
+
+        // Get the ISEQ from the method definition
+        let iseq = get_def_iseq_ptr((*cme).def);
+        if iseq.is_null() {
+            panic!("Failed to get ISEQ for {}#{}",
+                std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"),
+                method_name);
+        }
+
+        // Get the size of the ISEQ in instruction units
+        let encoded_size = rb_iseq_encoded_size(iseq);
+
+        // Scan through the ISEQ to find invokebuiltin instructions
+        let mut insn_idx: u32 = 0;
+        let mut func_ptr = std::ptr::null_mut::<c_void>();
+
+        while insn_idx < encoded_size {
+            // Get the PC for this instruction index
+            let pc = rb_iseq_pc_at_idx(iseq, insn_idx);
+
+            // Get the opcode using the proper decoder
+            let opcode = rb_iseq_opcode_at_pc(iseq, pc);
+
+            if opcode == YARVINSN_invokebuiltin as i32 ||
+               opcode == YARVINSN_opt_invokebuiltin_delegate as i32 ||
+               opcode == YARVINSN_opt_invokebuiltin_delegate_leave as i32 {
+                // The first operand is the builtin function pointer
+                let bf_value = *pc.add(1);
+                let bf_ptr = bf_value.as_ptr() as *const rb_builtin_function;
+
+                if func_ptr.is_null() {
+                    func_ptr = (*bf_ptr).func_ptr as *mut c_void;
+                } else {
+                    panic!("Multiple invokebuiltin instructions found in ISEQ for {}#{}",
+                        std::ffi::CStr::from_ptr(rb_class2name(class)).to_str().unwrap_or("?"),
+                        method_name);
+                }
+            }
+
+            // Move to the next instruction using the proper length
+            insn_idx = insn_idx.saturating_add(rb_insn_len(VALUE(opcode as usize)).try_into().unwrap());
+        }
+
+        // Only insert the properties if its iseq has exactly one invokebuiltin instruction
+        props_map.insert(func_ptr, props);
+    }
+}
+
 /// Gather annotations. Run this right after boot since the annotations
 /// are about the stock versions of methods.
 pub fn init() -> Annotations {
     let cfuncs = &mut HashMap::new();
+    let builtin_funcs = &mut HashMap::new();
 
     macro_rules! annotate {
         ($module:ident, $method_name:literal, $return_type:expr, $($properties:ident),+) => {
@@ -74,6 +149,22 @@ pub fn init() -> Annotations {
         }
     }
 
+    macro_rules! annotate_builtin {
+        ($module:ident, $method_name:literal, $return_type:expr) => {
+            annotate_builtin!($module, $method_name, $return_type, no_gc, leaf, elidable)
+        };
+        ($module:ident, $method_name:literal, $return_type:expr, $($properties:ident),+) => {
+            let mut props = FnProperties {
+                no_gc: false,
+                leaf: false,
+                elidable: false,
+                return_type: $return_type
+            };
+            $(props.$properties = true;)+
+            annotate_builtin_method(builtin_funcs, unsafe { $module }, $method_name, props);
+        }
+    }
+
     annotate!(rb_mKernel, "itself", types::BasicObject, no_gc, leaf, elidable);
     annotate!(rb_cString, "bytesize", types::Fixnum, no_gc, leaf);
     annotate!(rb_cModule, "name", types::StringExact.union(types::NilClass), no_gc, leaf, elidable);
@@ -83,7 +174,12 @@ pub fn init() -> Annotations {
     annotate!(rb_cNilClass, "nil?", types::TrueClass, no_gc, leaf, elidable);
     annotate!(rb_mKernel, "nil?", types::FalseClass, no_gc, leaf, elidable);
 
+    annotate_builtin!(rb_mKernel, "Float", types::Flonum);
+    annotate_builtin!(rb_mKernel, "Integer", types::Integer);
+    annotate_builtin!(rb_mKernel, "class", types::Class, leaf);
+
     Annotations {
-        cfuncs: std::mem::take(cfuncs)
+        cfuncs: std::mem::take(cfuncs),
+        builtin_funcs: std::mem::take(builtin_funcs),
     }
 }
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index 61514c88769d87..976580c85b25e1 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -522,7 +522,12 @@ pub enum Insn {
     },
 
     // Invoke a builtin function
-    InvokeBuiltin { bf: rb_builtin_function, args: Vec<InsnId>, state: InsnId },
+    InvokeBuiltin {
+        bf: rb_builtin_function,
+        args: Vec<InsnId>,
+        state: InsnId,
+        return_type: Option<Type>,  // None for unannotated builtins
+    },
 
     /// Control flow instructions
     Return { val: InsnId },
@@ -1163,7 +1168,7 @@ impl Function {
                 args: find_vec!(args),
                 state,
             },
-            &InvokeBuiltin { bf, ref args, state } => InvokeBuiltin { bf: bf, args: find_vec!(args), state },
+            &InvokeBuiltin { bf, ref args, state, return_type } => InvokeBuiltin { bf, args: find_vec!(args), state, return_type },
             &ArrayDup { val, state } => ArrayDup { val: find!(val), state },
             &HashDup { val, state } => HashDup { val: find!(val), state },
             &CCall { cfun, ref args, name, return_type, elidable } => CCall { cfun, args: find_vec!(args), name, return_type, elidable },
@@ -1260,7 +1265,7 @@ impl Function {
             Insn::SendWithoutBlock { .. } => types::BasicObject,
             Insn::SendWithoutBlockDirect { .. } => types::BasicObject,
             Insn::Send { .. } => types::BasicObject,
-            Insn::InvokeBuiltin { .. } => types::BasicObject,
+            Insn::InvokeBuiltin { return_type, .. } => return_type.unwrap_or(types::BasicObject),
             Insn::Defined { .. } => types::BasicObject,
             Insn::DefinedIvar { .. } => types::BasicObject,
             Insn::GetConstantPath { .. } => types::BasicObject,
@@ -3119,7 +3124,18 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
                     args.reverse();
 
                     let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
-                    let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { bf, args, state: exit_id });
+
+                    // Check if this builtin is annotated
+                    let return_type = ZJITState::get_method_annotations()
+                        .get_builtin_properties(&bf)
+                        .map(|props| props.return_type);
+
+                    let insn_id = fun.push_insn(block, Insn::InvokeBuiltin {
+                        bf,
+                        args,
+                        state: exit_id,
+                        return_type,
+                    });
                     state.stack_push(insn_id);
                 }
                 YARVINSN_opt_invokebuiltin_delegate |
@@ -3134,7 +3150,18 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
                     }
 
                     let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
-                    let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { bf, args, state: exit_id });
+
+                    // Check if this builtin is annotated
+                    let return_type = ZJITState::get_method_annotations()
+                        .get_builtin_properties(&bf)
+                        .map(|props| props.return_type);
+
+                    let insn_id = fun.push_insn(block, Insn::InvokeBuiltin {
+                        bf,
+                        args,
+                        state: exit_id,
+                        return_type,
+                    });
                     state.stack_push(insn_id);
                 }
                 YARVINSN_objtostring => {
@@ -4973,23 +5000,53 @@ mod tests {
     }
 
     #[test]
-    fn test_invokebuiltin_delegate_with_args() {
+    fn test_invokebuiltin_delegate_annotated() {
         assert_method_hir_with_opcode("Float", YARVINSN_opt_invokebuiltin_delegate_leave, expect![[r#"
             fn Float@<internal:kernel>:197:
             bb0(v0:BasicObject, v1:BasicObject, v2:BasicObject, v3:BasicObject):
-              v6:BasicObject = InvokeBuiltin rb_f_float, v0, v1, v2
+              v6:Flonum = InvokeBuiltin rb_f_float, v0, v1, v2
               Jump bb1(v0, v1, v2, v3, v6)
-            bb1(v8:BasicObject, v9:BasicObject, v10:BasicObject, v11:BasicObject, v12:BasicObject):
+            bb1(v8:BasicObject, v9:BasicObject, v10:BasicObject, v11:BasicObject, v12:Flonum):
               Return v12
         "#]]);
     }
 
     #[test]
-    fn test_invokebuiltin_delegate_without_args() {
+    fn test_invokebuiltin_cexpr_annotated() {
         assert_method_hir_with_opcode("class", YARVINSN_opt_invokebuiltin_delegate_leave, expect![[r#"
             fn class@<internal:kernel>:20:
             bb0(v0:BasicObject):
-              v3:BasicObject = InvokeBuiltin _bi20, v0
+              v3:Class = InvokeBuiltin _bi20, v0
+              Jump bb1(v0, v3)
+            bb1(v5:BasicObject, v6:Class):
+              Return v6
+        "#]]);
+    }
+
+    #[test]
+    fn test_invokebuiltin_delegate_with_args() {
+        // Using an unannotated builtin to test InvokeBuiltin generation
+        let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("Dir", "open"));
+        assert!(iseq_contains_opcode(iseq, YARVINSN_opt_invokebuiltin_delegate), "iseq Dir.open does not contain invokebuiltin");
+        let function = iseq_to_hir(iseq).unwrap();
+        assert_function_hir(function, expect![[r#"
+            fn open@<internal:dir>:184:
+            bb0(v0:BasicObject, v1:BasicObject, v2:BasicObject, v3:BasicObject, v4:BasicObject):
+              v5:NilClass = Const Value(nil)
+              v8:BasicObject = InvokeBuiltin dir_s_open, v0, v1, v2
+              SideExit UnknownOpcode(getblockparamproxy)
+        "#]]);
+    }
+
+    #[test]
+    fn test_invokebuiltin_delegate_without_args() {
+        let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("GC", "enable"));
+        assert!(iseq_contains_opcode(iseq, YARVINSN_opt_invokebuiltin_delegate_leave), "iseq GC.enable does not contain invokebuiltin");
+        let function = iseq_to_hir(iseq).unwrap();
+        assert_function_hir(function, expect![[r#"
+            fn enable@<internal:gc>:55:
+            bb0(v0:BasicObject):
+              v3:BasicObject = InvokeBuiltin gc_enable, v0
               Jump bb1(v0, v3)
             bb1(v5:BasicObject, v6:BasicObject):
               Return v6