From ff428b4dd0c5f0a07abbd8f8520d8d1e4bff8d66 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 25 Jul 2025 22:09:51 -0400 Subject: [PATCH 1/9] ZJIT: Keep a frame pointer and use it for memory params Previously, ZJIT miscompiled the following because of native SP interference. def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8] a(0,0,0,0,0,0,0, :ok) Commented problematic disassembly: ; call rb_ary_new_capa mov x0, #1 mov x16, #0x1278 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 ; call rb_ary_push mov x1, x0 str x1, [sp, #-0x10]! ; c_push() from alloc_regs() mov x0, x1 ; arg0, the array ldur x1, [sp] ; meant to be arg1=n8, but sp just moved! mov x16, #0x3968 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 Since the frame pointer stays constant in the body of the function, static offsets based on it don't run the risk of being invalidated by SP movements. Pass the registers to preserve through Insn::FrameSetup. This allows ARM to use STP and waste no gaps between EC, SP, and CFP. x86 now preserves and restores RBP since we use it as the frame pointer. Since all arches now have a frame pointer, remove offset based SP movement in the epilogue and restore registers using the frame pointer. --- test/ruby/test_zjit.rb | 7 ++ zjit/src/asm/arm64/opnd.rs | 3 + zjit/src/backend/arm64/mod.rs | 117 ++++++++++++++++++++++++++++++--- zjit/src/backend/lir.rs | 41 ++++++------ zjit/src/backend/x86_64/mod.rs | 76 ++++++++++++++++----- zjit/src/codegen.rs | 87 +++++++----------------- 6 files changed, 220 insertions(+), 111 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 0dcdb8e4cb8275..fc085d2e93c9a4 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -819,6 +819,13 @@ def a(n1,n2,n3,n4,n5,n6,n7,n8) = self } end + def test_spilled_param_new_arary + assert_compiles '[:ok]', %q{ + def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8] + a(0,0,0,0,0,0,0, :ok) + } + end + def test_opt_aref_with assert_compiles ':ok', %q{ def aref_with(hash) = hash["key"] diff --git a/zjit/src/asm/arm64/opnd.rs b/zjit/src/asm/arm64/opnd.rs index 28422b747652d8..a77958f7e6eeec 100644 --- a/zjit/src/asm/arm64/opnd.rs +++ b/zjit/src/asm/arm64/opnd.rs @@ -119,6 +119,9 @@ pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 }; pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 }; pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 }; +// frame pointer (base pointer) +pub const X29_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 29 }; + // link register pub const X30_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 30 }; diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 88ccad8e091ed1..42dc31c90fd5cc 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -29,6 +29,7 @@ pub const C_ARG_OPNDS: [Opnd; 6] = [ pub const C_RET_REG: Reg = X0_REG; pub const C_RET_OPND: Opnd = Opnd::Reg(X0_REG); pub const NATIVE_STACK_PTR: Opnd = Opnd::Reg(XZR_REG); +pub const NATIVE_BASE_PTR: Opnd = Opnd::Reg(X29_REG); // These constants define the way we work with Arm64's stack pointer. The stack // pointer always needs to be aligned to a 16-byte boundary. @@ -911,18 +912,54 @@ impl Assembler cb.write_byte(0); } }, - Insn::FrameSetup => { + &Insn::FrameSetup { preserved, mut slot_count } => { + const { assert!(SIZEOF_VALUE == 8, "alignment logic relies on SIZEOF_VALUE == 8"); } + // Preserve X29 and set up frame record stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); - - // X29 (frame_pointer) = SP mov(cb, X29, C_SP_REG); - }, - Insn::FrameTeardown => { + + for regs in preserved.chunks(2) { + // For the body, store pairs and move SP + if let [reg0, reg1] = regs { + stp_pre(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, C_SP_REG, -16)); + } else if let [reg] = regs { + // For overhang, store but don't move SP. Combine movement with + // movement for slots below. + stur(cb, reg.into(), A64Opnd::new_mem(64, C_SP_REG, -8)); + slot_count += 1; + } else { + unreachable!("chunks(2)"); + } + } + // Align slot_count + if slot_count % 2 == 1 { + slot_count += 1 + } + if slot_count > 0 { + let slot_offset = (slot_count * SIZEOF_VALUE) as u64; + // Bail when asked to reserve too many slots in one instruction. + ShiftedImmediate::try_from(slot_offset).ok()?; + sub(cb, C_SP_REG, C_SP_REG, A64Opnd::new_uimm(slot_offset)); + } + } + Insn::FrameTeardown { preserved } => { + // Restore preserved registers below frame pointer. + let mut base_offset = 0; + for regs in preserved.chunks(2) { + if let [reg0, reg1] = regs { + base_offset -= 16; + ldp(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, X29, base_offset)); + } else if let [reg] = regs { + ldur(cb, reg.into(), A64Opnd::new_mem(64, X29, base_offset - 8)); + } else { + unreachable!("chunks(2)"); + } + } + // SP = X29 (frame pointer) mov(cb, C_SP_REG, X29); - ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); - }, + } Insn::Add { left, right, out } => { // Usually, we issue ADDS, so you could branch on overflow, but ADDS with // out=31 refers to out=XZR, which discards the sum. So, instead of ADDS @@ -1482,11 +1519,73 @@ mod tests { fn test_emit_frame() { let (mut asm, mut cb) = setup_asm(); - asm.frame_setup(); - asm.frame_teardown(); + asm.frame_setup(&[], 0); + asm.frame_teardown(&[]); asm.compile_with_num_regs(&mut cb, 0); } + #[test] + fn frame_setup_and_teardown() { + const THREE_REGS: &'static [Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG)]; + // Test 3 preserved regs (odd), odd slot_count + { + let (mut asm, mut cb) = setup_asm(); + asm.frame_setup(THREE_REGS, 3); + asm.frame_teardown(THREE_REGS); + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f5831ff8ff8300d1b44f7fa9b5835ef8bf030091fd7bc1a8", " + 0x0: stp x29, x30, [sp, #-0x10]! + 0x4: mov x29, sp + 0x8: stp x20, x19, [sp, #-0x10]! + 0xc: stur x21, [sp, #-8] + 0x10: sub sp, sp, #0x20 + 0x14: ldp x20, x19, [x29, #-0x10] + 0x18: ldur x21, [x29, #-0x18] + 0x1c: mov sp, x29 + 0x20: ldp x29, x30, [sp], #0x10 + "); + } + + // Test 3 preserved regs (odd), even slot_count + { + let (mut asm, mut cb) = setup_asm(); + asm.frame_setup(THREE_REGS, 4); + asm.frame_teardown(THREE_REGS); + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f5831ff8ffc300d1b44f7fa9b5835ef8bf030091fd7bc1a8", " + 0x0: stp x29, x30, [sp, #-0x10]! + 0x4: mov x29, sp + 0x8: stp x20, x19, [sp, #-0x10]! + 0xc: stur x21, [sp, #-8] + 0x10: sub sp, sp, #0x30 + 0x14: ldp x20, x19, [x29, #-0x10] + 0x18: ldur x21, [x29, #-0x18] + 0x1c: mov sp, x29 + 0x20: ldp x29, x30, [sp], #0x10 + "); + } + + // Test 4 preserved regs (even), odd slot_count + { + static FOUR_REGS: &'static [Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG), Opnd::Reg(X22_REG)]; + let (mut asm, mut cb) = setup_asm(); + asm.frame_setup(FOUR_REGS, 3); + asm.frame_teardown(FOUR_REGS); + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f657bfa9ff8300d1b44f7fa9b6577ea9bf030091fd7bc1a8", " + 0x0: stp x29, x30, [sp, #-0x10]! + 0x4: mov x29, sp + 0x8: stp x20, x19, [sp, #-0x10]! + 0xc: stp x22, x21, [sp, #-0x10]! + 0x10: sub sp, sp, #0x20 + 0x14: ldp x20, x19, [x29, #-0x10] + 0x18: ldp x22, x21, [x29, #-0x20] + 0x1c: mov sp, x29 + 0x20: ldp x29, x30, [sp], #0x10 + "); + } + } + #[test] fn test_emit_je_fits_into_bcond() { let (mut asm, mut cb) = setup_asm(); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 7bac210bee6689..36e783bd4e658a 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -12,10 +12,12 @@ use crate::asm::{CodeBlock, Label}; pub use crate::backend::current::{ Reg, EC, CFP, SP, - NATIVE_STACK_PTR, + NATIVE_STACK_PTR, NATIVE_BASE_PTR, C_ARG_OPNDS, C_RET_REG, C_RET_OPND, }; +pub static JIT_PRESERVED_REGS: &'static [Opnd] = &[CFP, SP, EC]; + // Memory operand base #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum MemBase @@ -291,8 +293,6 @@ pub enum Target context: Option, /// We use this to enrich asm comments. reason: SideExitReason, - /// The number of bytes we need to adjust the C stack pointer by. - c_stack_bytes: usize, /// Some if the side exit should write this label. We use it for patch points. label: Option