diff --git a/doc/string/partition.rdoc b/doc/string/partition.rdoc
index ece034ee66225e..ee445bd21f32ba 100644
--- a/doc/string/partition.rdoc
+++ b/doc/string/partition.rdoc
@@ -29,7 +29,7 @@ If +pattern+ is a Regexp, performs the equivalent of <tt>self.match(pattern)</tt
   'hello'.partition(/x/)  # => ["hello", "", ""]
 
 If +pattern+ is not a Regexp, converts it to a string (if it is not already one),
-then performs the equivalet of <tt>self.index(pattern)</tt>
+then performs the equivalent of <tt>self.index(pattern)</tt>
 (and does _not_ set {pattern-matching global variables}[rdoc-ref:globals.md@Pattern+Matching]):
 
   'hello'.partition('h')     # => ["", "h", "ello"]
diff --git a/insns.def b/insns.def
index eef0d3f5dc1124..69a8210d7d6f99 100644
--- a/insns.def
+++ b/insns.def
@@ -1470,6 +1470,7 @@ opt_ltlt
  * string.  Then what happens if that codepoint does not exist in the
  * string's encoding?  Of course an exception.  That's not a leaf. */
 // attr bool leaf = false; /* has "invalid codepoint" exception */
+// attr bool zjit_profile = true;
 {
     val = vm_opt_ltlt(recv, obj);
 
@@ -1537,6 +1538,7 @@ opt_aset
 /* This is another story than opt_aref.  When vm_opt_aset() resorts
  * to rb_hash_aset(), which should call #hash for `obj`. */
 // attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */
+// attr bool zjit_profile = true;
 {
     val = vm_opt_aset(recv, obj, set);
 
@@ -1551,6 +1553,7 @@ opt_length
 (CALL_DATA cd)
 (VALUE recv)
 (VALUE val)
+// attr bool zjit_profile = true;
 {
     val = vm_opt_length(recv, BOP_LENGTH);
 
@@ -1565,6 +1568,7 @@ opt_size
 (CALL_DATA cd)
 (VALUE recv)
 (VALUE val)
+// attr bool zjit_profile = true;
 {
     val = vm_opt_length(recv, BOP_SIZE);
 
@@ -1624,6 +1628,7 @@ opt_regexpmatch2
 (VALUE obj2, VALUE obj1)
 (VALUE val)
 // attr bool leaf = false; /* match_at() has rb_thread_check_ints() */
+// attr bool zjit_profile = true;
 {
     val = vm_opt_regexpmatch2(obj2, obj1);
 
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index 40df3ae4d5830b..8205d6de76bd2f 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -1602,7 +1602,7 @@ impl Assembler
                 if c_args.len() > 0 {
                     // Resolve C argument dependencies
                     let c_args_len = c_args.len() as isize;
-                    let moves = Self::reorder_reg_moves(&c_args.drain(..).collect());
+                    let moves = Self::reorder_reg_moves(&std::mem::take(&mut c_args));
                     shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len);
 
                     // Push batched C arguments
diff --git a/zjit/src/asm/mod.rs b/zjit/src/asm/mod.rs
index aeb429382d88c0..32dc633a2941ce 100644
--- a/zjit/src/asm/mod.rs
+++ b/zjit/src/asm/mod.rs
@@ -20,7 +20,6 @@ pub mod arm64;
 pub struct Label(pub usize);
 
 /// Reference to an ASM label
-#[derive(Clone)]
 pub struct LabelRef {
     // Position in the code block where the label reference exists
     pos: usize,
@@ -34,7 +33,7 @@ pub struct LabelRef {
     num_bytes: usize,
 
     /// The object that knows how to encode the branch instruction.
-    encode: fn(&mut CodeBlock, i64, i64)
+    encode: Box<dyn Fn(&mut CodeBlock, i64, i64)>,
 }
 
 /// Block of memory into which instructions can be assembled
@@ -223,11 +222,11 @@ impl CodeBlock {
     }
 
     // Add a label reference at the current write position
-    pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: fn(&mut CodeBlock, i64, i64)) {
+    pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: impl Fn(&mut CodeBlock, i64, i64) + 'static) {
         assert!(label.0 < self.label_addrs.len());
 
         // Keep track of the reference
-        self.label_refs.push(LabelRef { pos: self.write_pos, label, num_bytes, encode });
+        self.label_refs.push(LabelRef { pos: self.write_pos, label, num_bytes, encode: Box::new(encode) });
 
         // Move past however many bytes the instruction takes up
         if self.write_pos + num_bytes < self.mem_size {
@@ -251,7 +250,7 @@ impl CodeBlock {
             assert!(label_addr < self.mem_size);
 
             self.write_pos = ref_pos;
-            (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
+            (label_ref.encode.as_ref())(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
 
             // Assert that we've written the same number of bytes that we
             // expected to have written.
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
index 5ac62c059986d5..6750926b35daa1 100644
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@@ -113,8 +113,8 @@ fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
         b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
         1
     } else {
-        let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
-        br(cb, Assembler::SCRATCH0);
+        let num_insns = emit_load_value(cb, Assembler::EMIT0_OPND, dst_addr as u64);
+        br(cb, Assembler::EMIT0_OPND);
         num_insns + 1
     };
 
@@ -181,7 +181,7 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
 
 /// List of registers that can be used for register allocation.
 /// This has the same number of registers for x86_64 and arm64.
-/// SCRATCH0 and SCRATCH1 are excluded.
+/// SCRATCH_OPND, EMIT0_OPND, and EMIT1_OPND are excluded.
 pub const ALLOC_REGS: &[Reg] = &[
     X0_REG,
     X1_REG,
@@ -193,17 +193,33 @@ pub const ALLOC_REGS: &[Reg] = &[
     X12_REG,
 ];
 
-impl Assembler
-{
-    /// Special scratch registers for intermediate processing.
-    /// This register is call-clobbered (so we don't have to save it before using it).
-    /// Avoid using if you can since this is used to lower [Insn] internally and
-    /// so conflicts are possible.
-    pub const SCRATCH_REG: Reg = X16_REG;
-    const SCRATCH0_REG: Reg = Self::SCRATCH_REG;
-    const SCRATCH1_REG: Reg = X17_REG;
-    const SCRATCH0: A64Opnd = A64Opnd::Reg(Self::SCRATCH0_REG);
-    const SCRATCH1: A64Opnd = A64Opnd::Reg(Self::SCRATCH1_REG);
+/// Special scratch register for intermediate processing. It should be used only by
+/// [`Assembler::arm64_split_with_scratch_reg`] or [`Assembler::new_with_scratch_reg`].
+const SCRATCH_OPND: Opnd = Opnd::Reg(X15_REG);
+
+impl Assembler {
+    /// Special registers for intermediate processing in arm64_emit. It should be used only by arm64_emit.
+    /// TODO: Remove the use of these registers by splitting instructions in arm64_split_with_scratch_reg.
+    const EMIT0_REG: Reg = X16_REG;
+    const EMIT1_REG: Reg = X17_REG;
+    const EMIT0_OPND: A64Opnd = A64Opnd::Reg(Self::EMIT0_REG);
+    const EMIT1_OPND: A64Opnd = A64Opnd::Reg(Self::EMIT1_REG);
+
+    /// Return an Assembler with scratch registers disabled in the backend, and a scratch register.
+    pub fn new_with_scratch_reg() -> (Self, Opnd) {
+        (Self::new_with_label_names(Vec::default(), 0, true), SCRATCH_OPND)
+    }
+
+    /// Return true if opnd contains a scratch reg
+    pub fn has_scratch_reg(opnd: Opnd) -> bool {
+        match opnd {
+            Opnd::Reg(_) => opnd == SCRATCH_OPND,
+            Opnd::Mem(Mem { base: MemBase::Reg(reg_no), .. }) => {
+                reg_no == SCRATCH_OPND.unwrap_reg().reg_no
+            }
+            _ => false,
+        }
+    }
 
     /// Get the list of registers from which we will allocate on this platform
     pub fn get_alloc_regs() -> Vec<Reg> {
@@ -372,7 +388,7 @@ impl Assembler
 
         let live_ranges: Vec<LiveRange> = take(&mut self.live_ranges);
         let mut iterator = self.insns.into_iter().enumerate().peekable();
-        let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len());
+        let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len(), self.accept_scratch_reg);
         let asm = &mut asm_local;
 
         while let Some((index, mut insn)) = iterator.next() {
@@ -555,14 +571,6 @@ impl Assembler
                         }
                     }
                 },
-                Insn::IncrCounter { mem, value } => {
-                    let counter_addr = match mem {
-                        Opnd::Mem(_) => asm.lea(*mem),
-                        _ => *mem
-                    };
-
-                    asm.incr_counter(counter_addr, *value);
-                },
                 Insn::JmpOpnd(opnd) => {
                     if let Opnd::Mem(_) = opnd {
                         let opnd0 = split_load_operand(asm, *opnd);
@@ -679,6 +687,58 @@ impl Assembler
         asm_local
     }
 
+    /// Split instructions using scratch registers. To maximize the use of the register pool for
+    /// VRegs, most splits should happen in [`Self::arm64_split`]. However, some instructions
+    /// need to be split with registers after `alloc_regs`, e.g. for `compile_side_exits`, so this
+    /// splits them and uses scratch registers for it.
+    fn arm64_split_with_scratch_reg(mut self) -> Assembler {
+        let mut iterator = self.insns.into_iter().enumerate().peekable();
+        let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), self.live_ranges.len(), true);
+
+        while let Some((_, mut insn)) = iterator.next() {
+            match &mut insn {
+                // For compile_side_exits, support splitting simple C arguments here
+                Insn::CCall { opnds, .. } if !opnds.is_empty() => {
+                    for (i, opnd) in opnds.iter().enumerate() {
+                        asm.load_into(C_ARG_OPNDS[i], *opnd);
+                    }
+                    *opnds = vec![];
+                    asm.push_insn(insn);
+                }
+                &mut Insn::Lea { opnd, out } => {
+                    match (opnd, out) {
+                        // Split here for compile_side_exits
+                        (Opnd::Mem(_), Opnd::Mem(_)) => {
+                            asm.lea_into(SCRATCH_OPND, opnd);
+                            asm.store(out, SCRATCH_OPND);
+                        }
+                        _ => {
+                            asm.push_insn(insn);
+                        }
+                    }
+                }
+                // Convert Opnd::const_ptr into Opnd::Mem. It's split here compile_side_exits.
+                &mut Insn::IncrCounter { mem, value } => {
+                    assert!(matches!(mem, Opnd::UImm(_)));
+                    asm.load_into(SCRATCH_OPND, mem);
+                    asm.lea_into(SCRATCH_OPND, Opnd::mem(64, SCRATCH_OPND, 0));
+                    asm.incr_counter(SCRATCH_OPND, value);
+                }
+                // Resolve ParallelMov that couldn't be handled without a scratch register.
+                Insn::ParallelMov { moves } => {
+                    for (reg, opnd) in Self::resolve_parallel_moves(moves, Some(SCRATCH_OPND)).unwrap() {
+                        asm.load_into(Opnd::Reg(reg), opnd);
+                    }
+                }
+                _ => {
+                    asm.push_insn(insn);
+                }
+            }
+        }
+
+        asm
+    }
+
     /// Emit platform-specific machine code
     /// Returns a list of GC offsets. Can return failure to signal caller to retry.
     fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Option<Vec<CodePtr>> {
@@ -739,8 +799,8 @@ impl Assembler
                     // that if it doesn't match it will skip over the
                     // instructions used for branching.
                     bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into());
-                    emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
-                    br(cb, Assembler::SCRATCH0);
+                    emit_load_value(cb, Assembler::EMIT0_OPND, dst_addr as u64);
+                    br(cb, Assembler::EMIT0_OPND);
 
                     // Here we'll return the number of instructions that it
                     // took to write out the destination address + 1 for the
@@ -806,8 +866,8 @@ impl Assembler
                     } else {
                         cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
                     }
-                    emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
-                    br(cb, Assembler::SCRATCH0);
+                    emit_load_value(cb, Assembler::EMIT0_OPND, dst_addr);
+                    br(cb, Assembler::EMIT0_OPND);
                 }
                 */
             } else {
@@ -979,7 +1039,7 @@ impl Assembler
                         (Some(Insn::JoMul(_)), _) |
                         (Some(Insn::PosMarker(_)), Some(Insn::JoMul(_))) => {
                             // Compute the high 64 bits
-                            smulh(cb, Self::SCRATCH0, left.into(), right.into());
+                            smulh(cb, Self::EMIT0_OPND, left.into(), right.into());
 
                             // Compute the low 64 bits
                             // This may clobber one of the input registers,
@@ -988,11 +1048,11 @@ impl Assembler
 
                             // Produce a register that is all zeros or all ones
                             // Based on the sign bit of the 64-bit mul result
-                            asr(cb, Self::SCRATCH1, out.into(), A64Opnd::UImm(63));
+                            asr(cb, Self::EMIT1_OPND, out.into(), A64Opnd::UImm(63));
 
                             // If the high 64-bits are not all zeros or all ones,
                             // matching the sign bit, then we have an overflow
-                            cmp(cb, Self::SCRATCH0, Self::SCRATCH1);
+                            cmp(cb, Self::EMIT0_OPND, Self::EMIT1_OPND);
                             // Insn::JoMul will emit_conditional_jump::<{Condition::NE}>
                         }
                         _ => {
@@ -1021,77 +1081,57 @@ impl Assembler
                 Insn::LShift { opnd, shift, out } => {
                     lsl(cb, out.into(), opnd.into(), shift.into());
                 },
-                store_insn @ Insn::Store { dest, src } => {
-                    // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are
-                    // accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory
-                    // base register to gurantee things will work.
+                Insn::Store { dest, src } => {
+                    // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are accepted.
                     let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else {
                         panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}");
                     };
 
-                    // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
-                    // so we panic to get the author to change their code.
-                    #[track_caller]
-                    fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
-                        assert_ne!(
-                            backend_use.reg_no,
-                            user_use,
-                            "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
-                        );
-                    }
-
-                    // Split src into SCRATCH0 if necessary
+                    // Split src into EMIT0_OPND if necessary
                     let src_reg: A64Reg = match src {
                         Opnd::Reg(reg) => *reg,
                         // Use zero register when possible
                         Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG,
                         // Immediates
                         &Opnd::Imm(imm) => {
-                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
-                            emit_load_value(cb, Self::SCRATCH0, imm as u64);
-                            Self::SCRATCH0_REG
+                            emit_load_value(cb, Self::EMIT0_OPND, imm as u64);
+                            Self::EMIT0_REG
                         }
                         &Opnd::UImm(imm) => {
-                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
-                            emit_load_value(cb, Self::SCRATCH0, imm);
-                            Self::SCRATCH0_REG
+                            emit_load_value(cb, Self::EMIT0_OPND, imm);
+                            Self::EMIT0_REG
                         }
                         &Opnd::Value(value) => {
-                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
-                            emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value);
-                            Self::SCRATCH0_REG
+                            emit_load_gc_value(cb, &mut gc_offsets, Self::EMIT0_OPND, value);
+                            Self::EMIT0_REG
                         }
                         src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => {
-                            // For mem-to-mem store, load the source into SCRATCH0
-                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
+                            // For mem-to-mem store, load the source into EMIT0_OPND
                             let src_mem = if mem_disp_fits_bits(src_disp) {
                                 src_mem.into()
                             } else {
-                                // Split the load address into SCRATCH0 first if necessary
-                                assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG);
-                                load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp);
-                                A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0)
+                                // Split the load address into EMIT0_OPND first if necessary
+                                load_effective_address(cb, Self::EMIT0_OPND, src_base_reg_no, src_disp);
+                                A64Opnd::new_mem(dest_num_bits, Self::EMIT0_OPND, 0)
                             };
                             match src_num_bits {
-                                64 | 32 => ldur(cb, Self::SCRATCH0, src_mem),
-                                16 => ldurh(cb, Self::SCRATCH0, src_mem),
-                                8 => ldurb(cb, Self::SCRATCH0, src_mem),
+                                64 | 32 => ldur(cb, Self::EMIT0_OPND, src_mem),
+                                16 => ldurh(cb, Self::EMIT0_OPND, src_mem),
+                                8 => ldurb(cb, Self::EMIT0_OPND, src_mem),
                                 num_bits => panic!("unexpected num_bits: {num_bits}")
                             };
-                            Self::SCRATCH0_REG
+                            Self::EMIT0_REG
                         }
                         src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}")
                     };
                     let src = A64Opnd::Reg(src_reg);
 
-                    // Split dest into SCRATCH1 if necessary.
+                    // Split dest into EMIT1_OPND if necessary.
                     let dest = if mem_disp_fits_bits(disp) {
                         dest.into()
                     } else {
-                        assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG);
-                        assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG);
-                        load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp);
-                        A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0)
+                        load_effective_address(cb, Self::EMIT1_OPND, base_reg_no, disp);
+                        A64Opnd::new_mem(dest_num_bits, Self::EMIT1_OPND, 0)
                     };
 
                     // This order may be surprising but it is correct. The way
@@ -1169,10 +1209,10 @@ impl Assembler
                     if let Target::Label(label_idx) = target {
                         // Set output to the raw address of the label
                         cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
-                            adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
+                            adr(cb, Self::EMIT0_OPND, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
                         });
 
-                        mov(cb, out.into(), Self::SCRATCH0);
+                        mov(cb, out.into(), Self::EMIT0_OPND);
                     } else {
                         // Set output to the jump target's raw address
                         let target_code = target.unwrap_code_ptr();
@@ -1197,15 +1237,15 @@ impl Assembler
                     }
 
                     // Push the flags/state register
-                    mrs(cb, Self::SCRATCH0, SystemRegister::NZCV);
-                    emit_push(cb, Self::SCRATCH0);
+                    mrs(cb, Self::EMIT0_OPND, SystemRegister::NZCV);
+                    emit_push(cb, Self::EMIT0_OPND);
                 },
                 Insn::CPopAll => {
                     let regs = Assembler::get_caller_save_regs();
 
                     // Pop the state/flags register
-                    msr(cb, SystemRegister::NZCV, Self::SCRATCH0);
-                    emit_pop(cb, Self::SCRATCH0);
+                    msr(cb, SystemRegister::NZCV, Self::EMIT0_OPND);
+                    emit_pop(cb, Self::EMIT0_OPND);
 
                     for reg in regs.into_iter().rev() {
                         emit_pop(cb, A64Opnd::Reg(reg));
@@ -1221,8 +1261,8 @@ impl Assembler
                     if b_offset_fits_bits((dst_addr - src_addr) / 4) {
                         bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
                     } else {
-                        emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
-                        blr(cb, Self::SCRATCH0);
+                        emit_load_value(cb, Self::EMIT0_OPND, dst_addr as u64);
+                        blr(cb, Self::EMIT0_OPND);
                     }
                 },
                 Insn::CRet { .. } => {
@@ -1302,17 +1342,17 @@ impl Assembler
                     let label = cb.new_label("incr_counter_loop".to_string());
                     cb.write_label(label);
 
-                    ldaxr(cb, Self::SCRATCH0, mem.into());
-                    add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into());
+                    ldaxr(cb, Self::EMIT0_OPND, mem.into());
+                    add(cb, Self::EMIT0_OPND, Self::EMIT0_OPND, value.into());
 
                     // The status register that gets used to track whether or
                     // not the store was successful must be 32 bytes. Since we
-                    // store the SCRATCH registers as their 64-bit versions, we
+                    // store the EMIT registers as their 64-bit versions, we
                     // need to rewrap it here.
-                    let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32));
-                    stlxr(cb, status, Self::SCRATCH0, mem.into());
+                    let status = A64Opnd::Reg(Self::EMIT1_REG.with_num_bits(32));
+                    stlxr(cb, status, Self::EMIT0_OPND, mem.into());
 
-                    cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0));
+                    cmp(cb, Self::EMIT1_OPND, A64Opnd::new_uimm(0));
                     emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label));
                 },
                 Insn::Breakpoint => {
@@ -1363,9 +1403,16 @@ impl Assembler
 
     /// Optimize and compile the stored instructions
     pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Result<(CodePtr, Vec<CodePtr>), CompileError> {
+        // The backend is allowed to use scratch registers only if it has not accepted them so far.
+        let use_scratch_reg = !self.accept_scratch_reg;
+
         let asm = self.arm64_split();
         let mut asm = asm.alloc_regs(regs)?;
+        // We put compile_side_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
         asm.compile_side_exits();
+        if use_scratch_reg {
+            asm = asm.arm64_split_with_scratch_reg();
+        }
 
         // Create label instances in the code block
         for (idx, name) in asm.label_names.iter().enumerate() {
@@ -1809,12 +1856,39 @@ mod tests {
         assert_snapshot!(cb.hexdump(), @"50000058030000140010000000000000b00200f8");
     }
 
+    #[test]
+    fn test_store_with_valid_scratch_reg() {
+        let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg();
+        let mut cb = CodeBlock::new_dummy();
+        asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into());
+
+        asm.compile_with_num_regs(&mut cb, 0);
+        assert_disasm_snapshot!(cb.disasm(), @r"
+        0x0: mov x16, #0x3902
+        0x4: movk x16, #8, lsl #16
+        0x8: stur x16, [x15]
+        ");
+        assert_snapshot!(cb.hexdump(), @"502087d21001a0f2f00100f8");
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_store_with_invalid_scratch_reg() {
+        let (_, scratch_reg) = Assembler::new_with_scratch_reg();
+        let (mut asm, mut cb) = setup_asm();
+        // This would put the source into scratch_reg, messing up the destination
+        asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into());
+
+        asm.compile_with_num_regs(&mut cb, 0);
+    }
+
     #[test]
     #[should_panic]
-    fn test_store_unserviceable() {
+    fn test_load_into_with_invalid_scratch_reg() {
+        let (_, scratch_reg) = Assembler::new_with_scratch_reg();
         let (mut asm, mut cb) = setup_asm();
-        // This would put the source into SCRATCH_REG, messing up the destination
-        asm.store(Opnd::mem(64, SCRATCH_OPND, 0), 0x83902.into());
+        // This would put the source into scratch_reg, messing up the destination
+        asm.load_into(scratch_reg, 0x83902.into());
 
         asm.compile_with_num_regs(&mut cb, 0);
     }
@@ -2300,13 +2374,13 @@ mod tests {
         asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
 
         assert_disasm_snapshot!(cb.disasm(), @"
-            0x0: mov x16, x0
+            0x0: mov x15, x0
             0x4: mov x0, x1
-            0x8: mov x1, x16
+            0x8: mov x1, x15
             0xc: mov x16, #0
             0x10: blr x16
         ");
-        assert_snapshot!(cb.hexdump(), @"f00300aae00301aae10310aa100080d200023fd6");
+        assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae1030faa100080d200023fd6");
     }
 
     #[test]
@@ -2324,16 +2398,16 @@ mod tests {
         asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
 
         assert_disasm_snapshot!(cb.disasm(), @"
-            0x0: mov x16, x2
+            0x0: mov x15, x2
             0x4: mov x2, x3
-            0x8: mov x3, x16
-            0xc: mov x16, x0
+            0x8: mov x3, x15
+            0xc: mov x15, x0
             0x10: mov x0, x1
-            0x14: mov x1, x16
+            0x14: mov x1, x15
             0x18: mov x16, #0
             0x1c: blr x16
         ");
-        assert_snapshot!(cb.hexdump(), @"f00302aae20303aae30310aaf00300aae00301aae10310aa100080d200023fd6");
+        assert_snapshot!(cb.hexdump(), @"ef0302aae20303aae3030faaef0300aae00301aae1030faa100080d200023fd6");
     }
 
     #[test]
@@ -2350,13 +2424,13 @@ mod tests {
         asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
 
         assert_disasm_snapshot!(cb.disasm(), @"
-            0x0: mov x16, x0
+            0x0: mov x15, x0
             0x4: mov x0, x1
             0x8: mov x1, x2
-            0xc: mov x2, x16
+            0xc: mov x2, x15
             0x10: mov x16, #0
             0x14: blr x16
         ");
-        assert_snapshot!(cb.hexdump(), @"f00300aae00301aae10302aae20310aa100080d200023fd6");
+        assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae10302aae2030faa100080d200023fd6");
     }
 }
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index 76a53c66d6b652..aad5600f569767 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -17,7 +17,6 @@ pub use crate::backend::current::{
     NATIVE_STACK_PTR, NATIVE_BASE_PTR,
     C_ARG_OPNDS, C_RET_REG, C_RET_OPND,
 };
-pub const SCRATCH_OPND: Opnd = Opnd::Reg(Assembler::SCRATCH_REG);
 
 pub static JIT_PRESERVED_REGS: &[Opnd] = &[CFP, SP, EC];
 
@@ -1173,6 +1172,10 @@ pub struct Assembler {
     /// Names of labels
     pub(super) label_names: Vec<String>,
 
+    /// If true, `push_insn` is allowed to use scratch registers.
+    /// On `compile`, it also disables the backend's use of them.
+    pub(super) accept_scratch_reg: bool,
+
     /// If Some, the next ccall should verify its leafness
     leaf_ccall_stack_size: Option<usize>
 }
@@ -1181,12 +1184,12 @@ impl Assembler
 {
     /// Create an Assembler
     pub fn new() -> Self {
-        Self::new_with_label_names(Vec::default(), 0)
+        Self::new_with_label_names(Vec::default(), 0, false)
     }
 
     /// Create an Assembler with parameters that are populated by another Assembler instance.
     /// This API is used for copying an Assembler for the next compiler pass.
-    pub fn new_with_label_names(label_names: Vec<String>, num_vregs: usize) -> Self {
+    pub fn new_with_label_names(label_names: Vec<String>, num_vregs: usize, accept_scratch_reg: bool) -> Self {
         let mut live_ranges = Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY);
         live_ranges.resize(num_vregs, LiveRange { start: None, end: None });
 
@@ -1194,6 +1197,7 @@ impl Assembler
             insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY),
             live_ranges,
             label_names,
+            accept_scratch_reg,
             leaf_ccall_stack_size: None,
         }
     }
@@ -1255,6 +1259,14 @@ impl Assembler
             }
         }
 
+        // If this Assembler should not accept scratch registers, assert no use of them.
+        if !self.accept_scratch_reg {
+            let opnd_iter = insn.opnd_iter();
+            for opnd in opnd_iter {
+                assert!(!Self::has_scratch_reg(*opnd), "should not use scratch register: {opnd:?}");
+            }
+        }
+
         self.insns.push(insn);
     }
 
@@ -1268,9 +1280,9 @@ impl Assembler
         Target::Label(label)
     }
 
-    // Shuffle register moves, sometimes adding extra moves using SCRATCH_REG,
+    // Shuffle register moves, sometimes adding extra moves using scratch_reg,
     // so that they will not rewrite each other before they are used.
-    pub fn resolve_parallel_moves(old_moves: &[(Reg, Opnd)]) -> Vec<(Reg, Opnd)> {
+    pub fn resolve_parallel_moves(old_moves: &[(Reg, Opnd)], scratch_reg: Option<Opnd>) -> Option<Vec<(Reg, Opnd)>> {
         // Return the index of a move whose destination is not used as a source if any.
         fn find_safe_move(moves: &[(Reg, Opnd)]) -> Option<usize> {
             moves.iter().enumerate().find(|&(_, &(dest_reg, _))| {
@@ -1289,19 +1301,21 @@ impl Assembler
                 new_moves.push(old_moves.remove(index));
             }
 
-            // No safe move. Load the source of one move into SCRATCH_REG, and
-            // then load SCRATCH_REG into the destination when it's safe.
+            // No safe move. Load the source of one move into scratch_reg, and
+            // then load scratch_reg into the destination when it's safe.
             if !old_moves.is_empty() {
-                // Make sure it's safe to use SCRATCH_REG
-                assert!(old_moves.iter().all(|&(_, opnd)| opnd != SCRATCH_OPND));
+                // If scratch_reg is None, return None and leave it to *_split_with_scratch_regs to resolve it.
+                let scratch_reg = scratch_reg?.unwrap_reg();
+                // Make sure it's safe to use scratch_reg
+                assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(scratch_reg)));
 
-                // Move SCRATCH <- opnd, and delay reg <- SCRATCH
+                // Move scratch_reg <- opnd, and delay reg <- scratch_reg
                 let (reg, opnd) = old_moves.remove(0);
-                new_moves.push((Assembler::SCRATCH_REG, opnd));
-                old_moves.push((reg, SCRATCH_OPND));
+                new_moves.push((scratch_reg, opnd));
+                old_moves.push((reg, Opnd::Reg(scratch_reg)));
             }
         }
-        new_moves
+        Some(new_moves)
     }
 
     /// Sets the out field on the various instructions that require allocated
@@ -1345,7 +1359,7 @@ impl Assembler
         // live_ranges is indexed by original `index` given by the iterator.
         let live_ranges: Vec<LiveRange> = take(&mut self.live_ranges);
         let mut iterator = self.insns.into_iter().enumerate().peekable();
-        let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len());
+        let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len(), self.accept_scratch_reg);
 
         while let Some((index, mut insn)) = iterator.next() {
             let before_ccall = match (&insn, iterator.peek().map(|(_, insn)| insn)) {
@@ -1510,9 +1524,14 @@ impl Assembler
             let is_ccall = matches!(insn, Insn::CCall { .. });
             match insn {
                 Insn::ParallelMov { moves } => {
-                    // Now that register allocation is done, it's ready to resolve parallel moves.
-                    for (reg, opnd) in Self::resolve_parallel_moves(&moves) {
-                        asm.load_into(Opnd::Reg(reg), opnd);
+                    // For trampolines that use scratch registers, attempt to lower ParallelMov without scratch_reg.
+                    if let Some(moves) = Self::resolve_parallel_moves(&moves, None) {
+                        for (reg, opnd) in moves {
+                            asm.load_into(Opnd::Reg(reg), opnd);
+                        }
+                    } else {
+                        // If it needs a scratch_reg, leave it to *_split_with_scratch_regs to handle it.
+                        asm.push_insn(Insn::ParallelMov { moves });
                     }
                 }
                 Insn::CCall { opnds, fptr, start_marker, end_marker, out } => {
@@ -1586,7 +1605,7 @@ impl Assembler
 
         for (idx, target) in targets {
             // Compile a side exit. Note that this is past the split pass and alloc_regs(),
-            // so you can't use a VReg or an instruction that needs to be split.
+            // so you can't use an instruction that returns a VReg.
             if let Target::SideExit { pc, stack, locals, reason, label } = target {
                 asm_comment!(self, "Exit: {reason}");
                 let side_exit_label = if let Some(label) = label {
@@ -1609,35 +1628,24 @@ impl Assembler
                 }
 
                 asm_comment!(self, "save cfp->pc");
-                self.load_into(SCRATCH_OPND, Opnd::const_ptr(pc));
-                self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), SCRATCH_OPND);
+                self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(pc));
 
                 asm_comment!(self, "save cfp->sp");
-                self.lea_into(SCRATCH_OPND, Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32));
-                let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP);
-                self.store(cfp_sp, SCRATCH_OPND);
+                self.lea_into(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32));
 
                 // Using C_RET_OPND as an additional scratch register, which is no longer used
                 if get_option!(stats) {
                     asm_comment!(self, "increment a side exit counter");
-                    self.load_into(SCRATCH_OPND, Opnd::const_ptr(exit_counter_ptr(reason)));
-                    self.incr_counter_with_reg(Opnd::mem(64, SCRATCH_OPND, 0), 1.into(), C_RET_OPND);
+                    self.incr_counter(Opnd::const_ptr(exit_counter_ptr(reason)), 1.into());
 
                     if let SideExitReason::UnhandledYARVInsn(opcode) = reason {
                         asm_comment!(self, "increment an unhandled YARV insn counter");
-                        self.load_into(SCRATCH_OPND, Opnd::const_ptr(exit_counter_ptr_for_opcode(opcode)));
-                        self.incr_counter_with_reg(Opnd::mem(64, SCRATCH_OPND, 0), 1.into(), C_RET_OPND);
+                        self.incr_counter(Opnd::const_ptr(exit_counter_ptr_for_opcode(opcode)), 1.into());
                     }
                 }
 
                 if get_option!(trace_side_exits) {
-                    // Use `load_into` with `C_ARG_OPNDS` instead of `opnds` argument for ccall, since `compile_side_exits`
-                    // is after the split pass, which would allow use of `opnds`.
-                    self.load_into(C_ARG_OPNDS[0], Opnd::const_ptr(pc as *const u8));
-                    self.ccall(
-                         rb_zjit_record_exit_stack as *const u8,
-                         vec![]
-                    );
+                    asm_ccall!(self, rb_zjit_record_exit_stack, Opnd::const_ptr(pc as *const u8));
                 }
 
                 asm_comment!(self, "exit to the interpreter");
@@ -1823,19 +1831,6 @@ impl Assembler {
         self.push_insn(Insn::IncrCounter { mem, value });
     }
 
-    /// incr_counter() but uses a specific register to split Insn::Lea
-    pub fn incr_counter_with_reg(&mut self, mem: Opnd, value: Opnd, reg: Opnd) {
-        assert!(matches!(reg, Opnd::Reg(_)), "incr_counter_with_reg should take a register, got: {reg:?}");
-        let counter_opnd = if cfg!(target_arch = "aarch64") { // See arm64_split()
-            assert_ne!(reg, SCRATCH_OPND, "SCRATCH_REG should be reserved for IncrCounter");
-            self.lea_into(reg, mem);
-            reg
-        } else { // x86_emit() expects Opnd::Mem
-            mem
-        };
-        self.incr_counter(counter_opnd, value);
-    }
-
     pub fn jbe(&mut self, target: Target) {
         self.push_insn(Insn::Jbe(target));
     }
@@ -1898,7 +1893,7 @@ impl Assembler {
     }
 
     pub fn lea_into(&mut self, out: Opnd, opnd: Opnd) {
-        assert!(matches!(out, Opnd::Reg(_)), "Destination of lea_into must be a register, got: {out:?}");
+        assert!(matches!(out, Opnd::Reg(_) | Opnd::Mem(_)), "Destination of lea_into must be a register or memory, got: {out:?}");
         self.push_insn(Insn::Lea { opnd, out });
     }
 
diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs
index 2edd15380871e1..b6c4658463048a 100644
--- a/zjit/src/backend/x86_64/mod.rs
+++ b/zjit/src/backend/x86_64/mod.rs
@@ -83,7 +83,7 @@ impl From<&Opnd> for X86Opnd {
 
 /// List of registers that can be used for register allocation.
 /// This has the same number of registers for x86_64 and arm64.
-/// SCRATCH_REG is excluded.
+/// SCRATCH_OPND is excluded.
 pub const ALLOC_REGS: &[Reg] = &[
     RDI_REG,
     RSI_REG,
@@ -95,14 +95,26 @@ pub const ALLOC_REGS: &[Reg] = &[
     RAX_REG,
 ];
 
-impl Assembler
-{
-    /// Special scratch registers for intermediate processing.
-    /// This register is call-clobbered (so we don't have to save it before using it).
-    /// Avoid using if you can since this is used to lower [Insn] internally and
-    /// so conflicts are possible.
-    pub const SCRATCH_REG: Reg = R11_REG;
-    const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG);
+/// Special scratch register for intermediate processing. It should be used only by
+/// [`Assembler::x86_split_with_scratch_reg`] or [`Assembler::new_with_scratch_reg`].
+const SCRATCH_OPND: Opnd = Opnd::Reg(R11_REG);
+
+impl Assembler {
+    /// Return an Assembler with scratch registers disabled in the backend, and a scratch register.
+    pub fn new_with_scratch_reg() -> (Self, Opnd) {
+        (Self::new_with_label_names(Vec::default(), 0, true), SCRATCH_OPND)
+    }
+
+    /// Return true if opnd contains a scratch reg
+    pub fn has_scratch_reg(opnd: Opnd) -> bool {
+        match opnd {
+            Opnd::Reg(_) => opnd == SCRATCH_OPND,
+            Opnd::Mem(Mem { base: MemBase::Reg(reg_no), .. }) => {
+                reg_no == SCRATCH_OPND.unwrap_reg().reg_no
+            }
+            _ => false,
+        }
+    }
 
     /// Get the list of registers from which we can allocate on this platform
     pub fn get_alloc_regs() -> Vec<Reg> {
@@ -127,7 +139,7 @@ impl Assembler
     {
         let live_ranges: Vec<LiveRange> = take(&mut self.live_ranges);
         let mut iterator = self.insns.into_iter().enumerate().peekable();
-        let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len());
+        let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len(), self.accept_scratch_reg);
 
         while let Some((index, mut insn)) = iterator.next() {
             let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. });
@@ -374,36 +386,165 @@ impl Assembler
         asm
     }
 
-    /// Emit platform-specific machine code
-    pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Option<Vec<CodePtr>> {
+    /// Split instructions using scratch registers. To maximize the use of the register pool
+    /// for VRegs, most splits should happen in [`Self::x86_split`]. However, some instructions
+    /// need to be split with registers after `alloc_regs`, e.g. for `compile_side_exits`, so
+    /// this splits them and uses scratch registers for it.
+    pub fn x86_split_with_scratch_reg(mut self) -> Assembler {
         /// For some instructions, we want to be able to lower a 64-bit operand
         /// without requiring more registers to be available in the register
-        /// allocator. So we just use the SCRATCH0 register temporarily to hold
+        /// allocator. So we just use the SCRATCH_OPND register temporarily to hold
         /// the value before we immediately use it.
-        fn emit_64bit_immediate(cb: &mut CodeBlock, opnd: &Opnd) -> X86Opnd {
+        fn split_64bit_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
             match opnd {
                 Opnd::Imm(value) => {
                     // 32-bit values will be sign-extended
-                    if imm_num_bits(*value) > 32 {
-                        mov(cb, Assembler::SCRATCH0, opnd.into());
-                        Assembler::SCRATCH0
+                    if imm_num_bits(value) > 32 {
+                        asm.mov(SCRATCH_OPND, opnd);
+                        SCRATCH_OPND
                     } else {
-                        opnd.into()
+                        opnd
                     }
                 },
                 Opnd::UImm(value) => {
                     // 32-bit values will be sign-extended
-                    if imm_num_bits(*value as i64) > 32 {
-                        mov(cb, Assembler::SCRATCH0, opnd.into());
-                        Assembler::SCRATCH0
+                    if imm_num_bits(value as i64) > 32 {
+                        asm.mov(SCRATCH_OPND, opnd);
+                        SCRATCH_OPND
                     } else {
-                        imm_opnd(*value as i64)
+                        Opnd::Imm(value as i64)
                     }
                 },
-                _ => opnd.into()
+                _ => opnd
             }
         }
 
+        let mut iterator = self.insns.into_iter().enumerate().peekable();
+        let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), self.live_ranges.len(), true);
+
+        while let Some((_, mut insn)) = iterator.next() {
+            match &mut insn {
+                Insn::Add { right, .. } |
+                Insn::Sub { right, .. } |
+                Insn::Mul { right, .. } |
+                Insn::And { right, .. } |
+                Insn::Or { right, .. } |
+                Insn::Xor { right, .. } |
+                Insn::Test { right, .. } => {
+                    *right = split_64bit_immediate(&mut asm, *right);
+                    asm.push_insn(insn);
+                }
+                Insn::Cmp { left, right } => {
+                    let num_bits = match right {
+                        Opnd::Imm(value) => Some(imm_num_bits(*value)),
+                        Opnd::UImm(value) => Some(uimm_num_bits(*value)),
+                        _ => None
+                    };
+
+                    // If the immediate is less than 64 bits (like 32, 16, 8), and the operand
+                    // sizes match, then we can represent it as an immediate in the instruction
+                    // without moving it to a register first.
+                    // IOW, 64 bit immediates must always be moved to a register
+                    // before comparisons, where other sizes may be encoded
+                    // directly in the instruction.
+                    let use_imm = num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64;
+                    if !use_imm {
+                        *right = split_64bit_immediate(&mut asm, *right);
+                    }
+                    asm.push_insn(insn);
+                }
+                // For compile_side_exits, support splitting simple C arguments here
+                Insn::CCall { opnds, .. } if !opnds.is_empty() => {
+                    for (i, opnd) in opnds.iter().enumerate() {
+                        asm.load_into(C_ARG_OPNDS[i], *opnd);
+                    }
+                    *opnds = vec![];
+                    asm.push_insn(insn);
+                }
+                &mut Insn::Lea { opnd, out } => {
+                    match (opnd, out) {
+                        // Split here for compile_side_exits
+                        (Opnd::Mem(_), Opnd::Mem(_)) => {
+                            asm.lea_into(SCRATCH_OPND, opnd);
+                            asm.store(out, SCRATCH_OPND);
+                        }
+                        _ => {
+                            asm.push_insn(insn);
+                        }
+                    }
+                }
+                Insn::LeaJumpTarget { target, out } => {
+                    if let Target::Label(_) = target {
+                        asm.push_insn(Insn::LeaJumpTarget { out: SCRATCH_OPND, target: target.clone() });
+                        asm.mov(*out, SCRATCH_OPND);
+                    }
+                }
+                // Convert Opnd::const_ptr into Opnd::Mem. This split is done here to give
+                // a register for compile_side_exits.
+                &mut Insn::IncrCounter { mem, value } => {
+                    assert!(matches!(mem, Opnd::UImm(_)));
+                    asm.load_into(SCRATCH_OPND, mem);
+                    asm.incr_counter(Opnd::mem(64, SCRATCH_OPND, 0), value);
+                }
+                // Resolve ParallelMov that couldn't be handled without a scratch register.
+                Insn::ParallelMov { moves } => {
+                    for (reg, opnd) in Self::resolve_parallel_moves(&moves, Some(SCRATCH_OPND)).unwrap() {
+                        asm.load_into(Opnd::Reg(reg), opnd);
+                    }
+                }
+                // Handle various operand combinations for spills on compile_side_exits.
+                &mut Insn::Store { dest, src } => {
+                    let Opnd::Mem(Mem { num_bits, .. }) = dest else {
+                        panic!("Unexpected Insn::Store destination in x86_split_with_scratch_reg: {dest:?}");
+                    };
+
+                    let src = match src {
+                        Opnd::Reg(_) => src,
+                        Opnd::Mem(_) => {
+                            asm.mov(SCRATCH_OPND, src);
+                            SCRATCH_OPND
+                        }
+                        Opnd::Imm(imm) => {
+                            // For 64 bit destinations, 32-bit values will be sign-extended
+                            if num_bits == 64 && imm_num_bits(imm) > 32 {
+                                asm.mov(SCRATCH_OPND, src);
+                                SCRATCH_OPND
+                            } else if uimm_num_bits(imm as u64) <= num_bits {
+                                // If the bit string is short enough for the destination, use the unsigned representation.
+                                // Note that 64-bit and negative values are ruled out.
+                                Opnd::UImm(imm as u64)
+                            } else {
+                                src
+                            }
+                        }
+                        Opnd::UImm(imm) => {
+                            // For 64 bit destinations, 32-bit values will be sign-extended
+                            if num_bits == 64 && imm_num_bits(imm as i64) > 32 {
+                                asm.mov(SCRATCH_OPND, src);
+                                SCRATCH_OPND
+                            } else {
+                                src.into()
+                            }
+                        }
+                        Opnd::Value(_) => {
+                            asm.load_into(SCRATCH_OPND, src);
+                            SCRATCH_OPND
+                        }
+                        src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_split_with_scratch_reg: {src:?}"),
+                    };
+                    asm.store(dest, src);
+                }
+                _ => {
+                    asm.push_insn(insn);
+                }
+            }
+        }
+
+        asm
+    }
+
+    /// Emit platform-specific machine code
+    pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Option<Vec<CodePtr>> {
         fn emit_csel(
             cb: &mut CodeBlock,
             truthy: Opnd,
@@ -506,33 +647,27 @@ impl Assembler
                 }
 
                 Insn::Add { left, right, .. } => {
-                    let opnd1 = emit_64bit_immediate(cb, right);
-                    add(cb, left.into(), opnd1);
+                    add(cb, left.into(), right.into());
                 },
 
                 Insn::Sub { left, right, .. } => {
-                    let opnd1 = emit_64bit_immediate(cb, right);
-                    sub(cb, left.into(), opnd1);
+                    sub(cb, left.into(), right.into());
                 },
 
                 Insn::Mul { left, right, .. } => {
-                    let opnd1 = emit_64bit_immediate(cb, right);
-                    imul(cb, left.into(), opnd1);
+                    imul(cb, left.into(), right.into());
                 },
 
                 Insn::And { left, right, .. } => {
-                    let opnd1 = emit_64bit_immediate(cb, right);
-                    and(cb, left.into(), opnd1);
+                    and(cb, left.into(), right.into());
                 },
 
                 Insn::Or { left, right, .. } => {
-                    let opnd1 = emit_64bit_immediate(cb, right);
-                    or(cb, left.into(), opnd1);
+                    or(cb, left.into(), right.into());
                 },
 
                 Insn::Xor { left, right, .. } => {
-                    let opnd1 = emit_64bit_immediate(cb, right);
-                    xor(cb, left.into(), opnd1);
+                    xor(cb, left.into(), right.into());
                 },
 
                 Insn::Not { opnd, .. } => {
@@ -551,64 +686,6 @@ impl Assembler
                     shr(cb, opnd.into(), shift.into())
                 },
 
-                store_insn @ Insn::Store { dest, src } => {
-                    let &Opnd::Mem(Mem { num_bits, base: MemBase::Reg(base_reg_no), disp: _ }) = dest else {
-                        panic!("Unexpected Insn::Store destination in x64_emit: {dest:?}");
-                    };
-
-                    // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
-                    // so we panic to get the author to change their code.
-                    #[track_caller]
-                    fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
-                        assert_ne!(
-                            backend_use.reg_no,
-                            user_use,
-                            "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
-                        );
-                    }
-
-                    let scratch = X86Opnd::Reg(Self::SCRATCH_REG);
-                    let src = match src {
-                        Opnd::Reg(_) => src.into(),
-                        &Opnd::Mem(_) => {
-                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
-                            mov(cb, scratch, src.into());
-                            scratch
-                        }
-                        &Opnd::Imm(imm) => {
-                            // For 64 bit destinations, 32-bit values will be sign-extended
-                            if num_bits == 64 && imm_num_bits(imm) > 32 {
-                                assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
-                                mov(cb, scratch, src.into());
-                                scratch
-                            } else if uimm_num_bits(imm as u64) <= num_bits {
-                                // If the bit string is short enough for the destination, use the unsigned representation.
-                                // Note that 64-bit and negative values are ruled out.
-                                uimm_opnd(imm as u64)
-                            } else {
-                                src.into()
-                            }
-                        }
-                        &Opnd::UImm(imm) => {
-                            // For 64 bit destinations, 32-bit values will be sign-extended
-                            if num_bits == 64 && imm_num_bits(imm as i64) > 32 {
-                                assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
-                                mov(cb, scratch, src.into());
-                                scratch
-                            } else {
-                                src.into()
-                            }
-                        }
-                        &Opnd::Value(value) => {
-                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
-                            emit_load_gc_value(cb, &mut gc_offsets, scratch, value);
-                            scratch
-                        }
-                        src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_emit: {src:?}")
-                    };
-                    mov(cb, dest.into(), src);
-                }
-
                 // This assumes only load instructions can contain references to GC'd Value operands
                 Insn::Load { opnd, out } |
                 Insn::LoadInto { dest: out, opnd } => {
@@ -626,6 +703,7 @@ impl Assembler
 
                 Insn::ParallelMov { .. } => unreachable!("{insn:?} should have been lowered at alloc_regs()"),
 
+                Insn::Store { dest, src } |
                 Insn::Mov { dest, src } => {
                     mov(cb, dest.into(), src.into());
                 },
@@ -638,13 +716,12 @@ impl Assembler
                 // Load address of jump target
                 Insn::LeaJumpTarget { target, out } => {
                     if let Target::Label(label) = target {
+                        let out = *out;
                         // Set output to the raw address of the label
-                        cb.label_ref(*label, 7, |cb, src_addr, dst_addr| {
+                        cb.label_ref(*label, 7, move |cb, src_addr, dst_addr| {
                             let disp = dst_addr - src_addr;
-                            lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap()));
+                            lea(cb, out.into(), mem_opnd(8, RIP, disp.try_into().unwrap()));
                         });
-
-                        mov(cb, out.into(), Self::SCRATCH0);
                     } else {
                         // Set output to the jump target's raw address
                         let target_code = target.unwrap_code_ptr();
@@ -700,30 +777,12 @@ impl Assembler
 
                 // Compare
                 Insn::Cmp { left, right } => {
-                    let num_bits = match right {
-                        Opnd::Imm(value) => Some(imm_num_bits(*value)),
-                        Opnd::UImm(value) => Some(uimm_num_bits(*value)),
-                        _ => None
-                    };
-
-                    // If the immediate is less than 64 bits (like 32, 16, 8), and the operand
-                    // sizes match, then we can represent it as an immediate in the instruction
-                    // without moving it to a register first.
-                    // IOW, 64 bit immediates must always be moved to a register
-                    // before comparisons, where other sizes may be encoded
-                    // directly in the instruction.
-                    if num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64 {
-                        cmp(cb, left.into(), right.into());
-                    } else {
-                        let emitted = emit_64bit_immediate(cb, right);
-                        cmp(cb, left.into(), emitted);
-                    }
+                    cmp(cb, left.into(), right.into());
                 }
 
                 // Test and set flags
                 Insn::Test { left, right } => {
-                    let emitted = emit_64bit_immediate(cb, right);
-                    test(cb, left.into(), emitted);
+                    test(cb, left.into(), right.into());
                 }
 
                 Insn::JmpOpnd(opnd) => {
@@ -893,9 +952,16 @@ impl Assembler
 
     /// Optimize and compile the stored instructions
     pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Result<(CodePtr, Vec<CodePtr>), CompileError> {
+        // The backend is allowed to use scratch registers only if it has not accepted them so far.
+        let use_scratch_regs = !self.accept_scratch_reg;
+
         let asm = self.x86_split();
         let mut asm = asm.alloc_regs(regs)?;
+        // We put compile_side_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
         asm.compile_side_exits();
+        if use_scratch_regs {
+            asm = asm.x86_split_with_scratch_reg();
+        }
 
         // Create label instances in the code block
         for (idx, name) in asm.label_names.iter().enumerate() {
@@ -1527,6 +1593,7 @@ mod tests {
         assert!(imitation_heap_value.heap_object_p());
         asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());
 
+        asm = asm.x86_split_with_scratch_reg();
         let gc_offsets = asm.x86_emit(&mut cb).unwrap();
         assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");
 
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 262a0361b4a74e..c5bdbcfe0a5a83 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -18,7 +18,7 @@ use crate::state::ZJITState;
 use crate::stats::{send_fallback_counter, exit_counter_for_compile_error, incr_counter, incr_counter_by, send_fallback_counter_for_method_type, send_without_block_fallback_counter_for_method_type, send_fallback_counter_ptr_for_opcode, CompileError};
 use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}};
 use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr};
-use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SCRATCH_OPND, SP};
+use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP};
 use crate::hir::{iseq_to_hir, BlockId, BranchEdge, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType};
 use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason};
 use crate::hir_type::{types, Type};
@@ -1592,9 +1592,7 @@ fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd,
 /// Generate code that records unoptimized C functions if --zjit-stats is enabled
 fn gen_incr_counter_ptr(asm: &mut Assembler, counter_ptr: *mut u64) {
     if get_option!(stats) {
-        let ptr_reg = asm.load(Opnd::const_ptr(counter_ptr as *const u8));
-        let counter_opnd = Opnd::mem(64, ptr_reg, 0);
-        asm.incr_counter(counter_opnd, Opnd::UImm(1));
+        asm.incr_counter(Opnd::const_ptr(counter_ptr as *const u8), Opnd::UImm(1));
     }
 }
 
@@ -1963,12 +1961,12 @@ fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result
 
 /// Compile a stub for an ISEQ called by SendWithoutBlockDirect
 fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result<CodePtr, CompileError> {
-    let mut asm = Assembler::new();
+    let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg();
     asm_comment!(asm, "Stub: {}", iseq_get_location(iseq_call.iseq.get(), 0));
 
     // Call function_stub_hit using the shared trampoline. See `gen_function_stub_hit_trampoline`.
     // Use load_into instead of mov, which is split on arm64, to avoid clobbering ALLOC_REGS.
-    asm.load_into(SCRATCH_OPND, Opnd::const_ptr(Rc::into_raw(iseq_call)));
+    asm.load_into(scratch_reg, Opnd::const_ptr(Rc::into_raw(iseq_call)));
     asm.jmp(ZJITState::get_function_stub_hit_trampoline().into());
 
     asm.compile(cb).map(|(code_ptr, gc_offsets)| {
@@ -1979,7 +1977,7 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result<CodeP
 
 /// Generate a trampoline that is used when a
 pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, CompileError> {
-    let mut asm = Assembler::new();
+    let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg();
     asm_comment!(asm, "function_stub_hit trampoline");
 
     // Maintain alignment for x86_64, and set up a frame for arm64 properly
@@ -1992,8 +1990,8 @@ pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, C
     const { assert!(ALLOC_REGS.len() % 2 == 0, "x86_64 would need to push one more if we push an odd number of regs"); }
 
     // Compile the stubbed ISEQ
-    let jump_addr = asm_ccall!(asm, function_stub_hit, SCRATCH_OPND, CFP, SP);
-    asm.mov(SCRATCH_OPND, jump_addr);
+    let jump_addr = asm_ccall!(asm, function_stub_hit, scratch_reg, CFP, SP);
+    asm.mov(scratch_reg, jump_addr);
 
     asm_comment!(asm, "restore argument registers");
     for &reg in ALLOC_REGS.iter().rev() {
@@ -2003,8 +2001,8 @@ pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, C
     // Discard the current frame since the JIT function will set it up again
     asm.frame_teardown(&[]);
 
-    // Jump to SCRATCH_OPND so that cpop_into() doesn't clobber it
-    asm.jmp_opnd(SCRATCH_OPND);
+    // Jump to scratch_reg so that cpop_into() doesn't clobber it
+    asm.jmp_opnd(scratch_reg);
 
     asm.compile(cb).map(|(code_ptr, gc_offsets)| {
         assert_eq!(gc_offsets.len(), 0);
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index ea1bf68acc4173..56b569e064c0b0 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -694,12 +694,17 @@ pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 229;
 pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 230;
 pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 231;
 pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 232;
-pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 233;
-pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 234;
-pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 235;
-pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 236;
-pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 237;
-pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 238;
+pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 233;
+pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 234;
+pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 235;
+pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 236;
+pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 237;
+pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 238;
+pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 239;
+pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 240;
+pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 241;
+pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 242;
+pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 243;
 pub type ruby_vminsn_type = u32;
 pub type rb_iseq_callback = ::std::option::Option<
     unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void),
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index a032d9ec8a30a7..33c034b819a7ec 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -2398,7 +2398,7 @@ impl Function {
                     if let Some(replacement) = (props.inline)(fun, tmp_block, recv, &args, state) {
                         // Copy contents of tmp_block to block
                         assert_ne!(block, tmp_block);
-                        let insns = fun.blocks[tmp_block.0].insns.drain(..).collect::<Vec<_>>();
+                        let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns);
                         fun.blocks[block.0].insns.extend(insns);
                         fun.make_equal_to(send_insn_id, replacement);
                         fun.remove_block(tmp_block);
@@ -2453,7 +2453,7 @@ impl Function {
                         if let Some(replacement) = (props.inline)(fun, tmp_block, recv, &args, state) {
                             // Copy contents of tmp_block to block
                             assert_ne!(block, tmp_block);
-                            let insns = fun.blocks[tmp_block.0].insns.drain(..).collect::<Vec<_>>();
+                            let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns);
                             fun.blocks[block.0].insns.extend(insns);
                             fun.make_equal_to(send_insn_id, replacement);
                             fun.remove_block(tmp_block);
@@ -4957,7 +4957,7 @@ mod tests {
     }
 
     #[track_caller]
-    fn assert_contains_opcode(method: &str, opcode: u32) {
+    pub fn assert_contains_opcode(method: &str, opcode: u32) {
         let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("self", method));
         unsafe { crate::cruby::rb_zjit_profile_disable(iseq) };
         assert!(iseq_contains_opcode(iseq, opcode), "iseq {method} does not contain {}", insn_name(opcode as usize));
@@ -7999,6 +7999,7 @@ mod opt_tests {
     use super::*;
     use crate::{hir_strings, options::*};
     use insta::assert_snapshot;
+    use super::tests::assert_contains_opcode;
 
     #[track_caller]
     fn hir_string(method: &str) -> String {
@@ -12817,4 +12818,145 @@ mod opt_tests {
           Return v25
         ");
     }
+
+    #[test]
+    fn test_optimize_array_aset() {
+        eval("
+            def test(arr)
+              arr[1] = 10
+            end
+            test([])
+        ");
+        assert_snapshot!(hir_string("test"), @r"
+        fn test@<compiled>:3:
+        bb0():
+          EntryPoint interpreter
+          v1:BasicObject = LoadSelf
+          v2:BasicObject = GetLocal l0, SP@4
+          Jump bb2(v1, v2)
+        bb1(v5:BasicObject, v6:BasicObject):
+          EntryPoint JIT(0)
+          Jump bb2(v5, v6)
+        bb2(v8:BasicObject, v9:BasicObject):
+          v14:Fixnum[1] = Const Value(1)
+          v15:Fixnum[10] = Const Value(10)
+          PatchPoint MethodRedefined(Array@0x1000, []=@0x1008, cme:0x1010)
+          PatchPoint NoSingletonClass(Array@0x1000)
+          v28:ArrayExact = GuardType v9, ArrayExact
+          v29:BasicObject = CCallVariadic []=@0x1038, v28, v14, v15
+          CheckInterrupts
+          Return v15
+        ");
+    }
+
+    #[test]
+    fn test_optimize_array_ltlt() {
+        eval("
+            def test(arr)
+              arr << 1
+            end
+            test([])
+        ");
+        assert_snapshot!(hir_string("test"), @r"
+        fn test@<compiled>:3:
+        bb0():
+          EntryPoint interpreter
+          v1:BasicObject = LoadSelf
+          v2:BasicObject = GetLocal l0, SP@4
+          Jump bb2(v1, v2)
+        bb1(v5:BasicObject, v6:BasicObject):
+          EntryPoint JIT(0)
+          Jump bb2(v5, v6)
+        bb2(v8:BasicObject, v9:BasicObject):
+          v13:Fixnum[1] = Const Value(1)
+          PatchPoint MethodRedefined(Array@0x1000, <<@0x1008, cme:0x1010)
+          PatchPoint NoSingletonClass(Array@0x1000)
+          v26:ArrayExact = GuardType v9, ArrayExact
+          v27:BasicObject = CCallWithFrame <<@0x1038, v26, v13
+          CheckInterrupts
+          Return v27
+        ");
+    }
+
+    #[test]
+    fn test_optimize_array_length() {
+        eval("
+            def test(arr) = arr.length
+            test([])
+        ");
+        assert_contains_opcode("test", YARVINSN_opt_length);
+        assert_snapshot!(hir_string("test"), @r"
+        fn test@<compiled>:2:
+        bb0():
+          EntryPoint interpreter
+          v1:BasicObject = LoadSelf
+          v2:BasicObject = GetLocal l0, SP@4
+          Jump bb2(v1, v2)
+        bb1(v5:BasicObject, v6:BasicObject):
+          EntryPoint JIT(0)
+          Jump bb2(v5, v6)
+        bb2(v8:BasicObject, v9:BasicObject):
+          PatchPoint MethodRedefined(Array@0x1000, length@0x1008, cme:0x1010)
+          PatchPoint NoSingletonClass(Array@0x1000)
+          v25:ArrayExact = GuardType v9, ArrayExact
+          v26:Fixnum = CCall length@0x1038, v25
+          CheckInterrupts
+          Return v26
+        ");
+    }
+
+    #[test]
+    fn test_optimize_array_size() {
+        eval("
+            def test(arr) = arr.size
+            test([])
+        ");
+        assert_contains_opcode("test", YARVINSN_opt_size);
+        assert_snapshot!(hir_string("test"), @r"
+        fn test@<compiled>:2:
+        bb0():
+          EntryPoint interpreter
+          v1:BasicObject = LoadSelf
+          v2:BasicObject = GetLocal l0, SP@4
+          Jump bb2(v1, v2)
+        bb1(v5:BasicObject, v6:BasicObject):
+          EntryPoint JIT(0)
+          Jump bb2(v5, v6)
+        bb2(v8:BasicObject, v9:BasicObject):
+          PatchPoint MethodRedefined(Array@0x1000, size@0x1008, cme:0x1010)
+          PatchPoint NoSingletonClass(Array@0x1000)
+          v25:ArrayExact = GuardType v9, ArrayExact
+          v26:Fixnum = CCall size@0x1038, v25
+          CheckInterrupts
+          Return v26
+        ");
+    }
+
+    #[test]
+    fn test_optimize_regexpmatch2() {
+        eval(r#"
+            def test(s) = s =~ /a/
+            test("foo")
+        "#);
+        assert_contains_opcode("test", YARVINSN_opt_regexpmatch2);
+        assert_snapshot!(hir_string("test"), @r"
+        fn test@<compiled>:2:
+        bb0():
+          EntryPoint interpreter
+          v1:BasicObject = LoadSelf
+          v2:BasicObject = GetLocal l0, SP@4
+          Jump bb2(v1, v2)
+        bb1(v5:BasicObject, v6:BasicObject):
+          EntryPoint JIT(0)
+          Jump bb2(v5, v6)
+        bb2(v8:BasicObject, v9:BasicObject):
+          v13:RegexpExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
+          PatchPoint MethodRedefined(String@0x1008, =~@0x1010, cme:0x1018)
+          PatchPoint NoSingletonClass(String@0x1008)
+          v26:StringExact = GuardType v9, StringExact
+          v27:BasicObject = CCallWithFrame =~@0x1040, v26, v13
+          CheckInterrupts
+          Return v27
+        ");
+    }
 }
diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs
index 67f2fdc7403822..e7db47142bcf65 100644
--- a/zjit/src/profile.rs
+++ b/zjit/src/profile.rs
@@ -74,9 +74,14 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) {
         YARVINSN_opt_or    => profile_operands(profiler, profile, 2),
         YARVINSN_opt_empty_p => profile_operands(profiler, profile, 1),
         YARVINSN_opt_aref  => profile_operands(profiler, profile, 2),
+        YARVINSN_opt_ltlt  => profile_operands(profiler, profile, 2),
+        YARVINSN_opt_aset  => profile_operands(profiler, profile, 3),
         YARVINSN_opt_not   => profile_operands(profiler, profile, 1),
         YARVINSN_getinstancevariable => profile_self(profiler, profile),
+        YARVINSN_opt_regexpmatch2    => profile_operands(profiler, profile, 2),
         YARVINSN_objtostring   => profile_operands(profiler, profile, 1),
+        YARVINSN_opt_length    => profile_operands(profiler, profile, 1),
+        YARVINSN_opt_size      => profile_operands(profiler, profile, 1),
         YARVINSN_opt_send_without_block => {
             let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr();
             let argc = unsafe { vm_ci_argc((*cd).ci) };