From 1b95afdee3260cfb49df1b20b4dc04c70c698251 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 8 Sep 2025 11:52:55 -0700 Subject: [PATCH 1/5] YJIT: Add more information to an assert message in jit_guard_known_class (#14480) * YJIT: Add more information to an assert message in jit_guard_known_class * Say "should be a heap object" instead Co-authored-by: Alan Wu --------- Co-authored-by: Alan Wu --- yjit/src/codegen.rs | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 8b472efdfdaa60..e243270af635ca 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2281,7 +2281,6 @@ fn gen_expandarray( jit_guard_known_klass( jit, asm, - comptime_recv.class_of(), array_opnd, array_opnd.into(), comptime_recv, @@ -3672,7 +3671,6 @@ fn gen_equality_specialized( jit_guard_known_klass( jit, asm, - unsafe { rb_cString }, a_opnd, a_opnd.into(), comptime_a, @@ -3698,7 +3696,6 @@ fn gen_equality_specialized( jit_guard_known_klass( jit, asm, - unsafe { rb_cString }, b_opnd, b_opnd.into(), comptime_b, @@ -3795,7 +3792,6 @@ fn gen_opt_aref( jit_guard_known_klass( jit, asm, - unsafe { rb_cArray }, recv_opnd, recv_opnd.into(), comptime_recv, @@ -3835,7 +3831,6 @@ fn gen_opt_aref( jit_guard_known_klass( jit, asm, - unsafe { rb_cHash }, recv_opnd, recv_opnd.into(), comptime_recv, @@ -3888,7 +3883,6 @@ fn gen_opt_aset( jit_guard_known_klass( jit, asm, - unsafe { rb_cArray }, recv, recv.into(), comptime_recv, @@ -3900,7 +3894,6 @@ fn gen_opt_aset( jit_guard_known_klass( jit, asm, - unsafe { rb_cInteger }, key, key.into(), comptime_key, @@ -3933,7 +3926,6 @@ fn gen_opt_aset( jit_guard_known_klass( jit, asm, - unsafe { rb_cHash }, recv, recv.into(), comptime_recv, @@ -4853,7 +4845,6 @@ fn gen_opt_new( perf_call!("opt_new: ", jit_guard_known_klass( jit, asm, - comptime_recv_klass, recv, recv.into(), comptime_recv, @@ -4924,13 +4915,13 @@ fn gen_jump( fn jit_guard_known_klass( jit: &mut JITState, asm: &mut Assembler, - known_klass: VALUE, obj_opnd: Opnd, insn_opnd: YARVOpnd, sample_instance: VALUE, max_chain_depth: u8, counter: Counter, ) { + let known_klass = sample_instance.class_of(); let val_type = asm.ctx.get_opnd_type(insn_opnd); if val_type.known_class() == Some(known_klass) { @@ -5036,7 +5027,7 @@ fn jit_guard_known_klass( assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String") }; } else { - assert!(!val_type.is_imm()); + assert!(!val_type.is_imm(), "{insn_opnd:?} should be a heap object, but was {val_type:?} for {sample_instance:?}"); // Check that the receiver is a heap object // Note: if we get here, the class doesn't have immediate instances. @@ -5680,7 +5671,6 @@ fn jit_rb_float_plus( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5722,7 +5712,6 @@ fn jit_rb_float_minus( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5764,7 +5753,6 @@ fn jit_rb_float_mul( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -5806,7 +5794,6 @@ fn jit_rb_float_div( jit_guard_known_klass( jit, asm, - comptime_obj.class_of(), obj, obj.into(), comptime_obj, @@ -6070,7 +6057,6 @@ fn jit_rb_str_getbyte( jit_guard_known_klass( jit, asm, - comptime_idx.class_of(), idx, idx.into(), comptime_idx, @@ -9085,7 +9071,6 @@ fn gen_send_general( perf_call!("gen_send_general: ", jit_guard_known_klass( jit, asm, - comptime_recv_klass, recv, recv_opnd, comptime_recv, @@ -10016,7 +10001,6 @@ fn gen_objtostring( jit_guard_known_klass( jit, asm, - comptime_recv.class_of(), recv, recv.into(), comptime_recv, @@ -10030,7 +10014,6 @@ fn gen_objtostring( jit_guard_known_klass( jit, asm, - comptime_recv.class_of(), recv, recv.into(), comptime_recv, From 80079ba42505d63cafac674bd23c0317e6a5cdd6 Mon Sep 17 00:00:00 2001 From: Aiden Fox Ivey Date: Mon, 8 Sep 2025 14:56:14 -0400 Subject: [PATCH 2/5] ZJIT: Fix 30k if stmt test (#14446) * ZJIT: Allow label generation above 19 bits * Refactor emit_conditional_jump to use generate_branch * Make branching functionality generic across Label and CodePtr * ZJIT: Add > 19 bit jump test and helper function * Remove an empty line --------- Co-authored-by: Takashi Kokubun --- zjit/src/asm/mod.rs | 7 +- zjit/src/backend/arm64/mod.rs | 139 ++++++++++++++++++++-------------- 2 files changed, 86 insertions(+), 60 deletions(-) diff --git a/zjit/src/asm/mod.rs b/zjit/src/asm/mod.rs index 8efa5df2701247..e07ac0a48c3d89 100644 --- a/zjit/src/asm/mod.rs +++ b/zjit/src/asm/mod.rs @@ -299,11 +299,15 @@ impl fmt::LowerHex for CodeBlock { impl CodeBlock { /// Stubbed CodeBlock for testing. Can't execute generated code. pub fn new_dummy() -> Self { + const DEFAULT_MEM_SIZE: usize = 1024; + CodeBlock::new_dummy_sized(DEFAULT_MEM_SIZE) + } + + pub fn new_dummy_sized(mem_size: usize) -> Self { use std::ptr::NonNull; use crate::virtualmem::*; use crate::virtualmem::tests::TestingAllocator; - let mem_size = 1024; let alloc = TestingAllocator::new(mem_size); let mem_start: *const u8 = alloc.mem_start(); let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024); @@ -388,4 +392,3 @@ mod tests assert_eq!(uimm_num_bits(u64::MAX), 64); } } - diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 9a23c2bd132dbf..6e040e5214f0c8 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -706,70 +706,67 @@ impl Assembler /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. fn emit_conditional_jump(cb: &mut CodeBlock, target: Target) { + fn generate_branch(cb: &mut CodeBlock, src_addr: i64, dst_addr: i64) { + let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { + // If the jump offset fits into the conditional jump as + // an immediate value and it's properly aligned, then we + // can use the b.cond instruction directly. We're safe + // to use as i32 here since we already checked that it + // fits. + let bytes = (dst_addr - src_addr) as i32; + bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); + + // Here we're going to return 1 because we've only + // written out 1 instruction. + 1 + } else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond + // If the jump offset fits into the unconditional jump as + // an immediate value, we can use inverse b.cond + b. + // + // We're going to write out the inverse condition so + // that if it doesn't match it will skip over the + // instruction used for branching. + bcond(cb, Condition::inverse(CONDITION), 2.into()); + b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond + + // We've only written out 2 instructions. + 2 + } else { + // Otherwise, we need to load the address into a + // register and use the branch register instruction. + let load_insns: i32 = emit_load_size(dst_addr as u64).into(); + + // We're going to write out the inverse condition so + // that if it doesn't match it will skip over the + // instructions used for branching. + bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into()); + emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); + br(cb, Assembler::SCRATCH0); + + // Here we'll return the number of instructions that it + // took to write out the destination address + 1 for the + // b.cond and 1 for the br. + load_insns + 2 + }; + + // We need to make sure we have at least 6 instructions for + // every kind of jump for invalidation purposes, so we're + // going to write out padding nop instructions here. + assert!(num_insns <= cb.conditional_jump_insns()); + (num_insns..cb.conditional_jump_insns()).for_each(|_| nop(cb)); + } + match target { Target::CodePtr(dst_ptr) => { let dst_addr = dst_ptr.as_offset(); let src_addr = cb.get_write_ptr().as_offset(); - - let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { - // If the jump offset fits into the conditional jump as - // an immediate value and it's properly aligned, then we - // can use the b.cond instruction directly. We're safe - // to use as i32 here since we already checked that it - // fits. - let bytes = (dst_addr - src_addr) as i32; - bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); - - // Here we're going to return 1 because we've only - // written out 1 instruction. - 1 - } else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond - // If the jump offset fits into the unconditional jump as - // an immediate value, we can use inverse b.cond + b. - // - // We're going to write out the inverse condition so - // that if it doesn't match it will skip over the - // instruction used for branching. - bcond(cb, Condition::inverse(CONDITION), 2.into()); - b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond - - // We've only written out 2 instructions. - 2 - } else { - // Otherwise, we need to load the address into a - // register and use the branch register instruction. - let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64(); - let load_insns: i32 = emit_load_size(dst_addr).into(); - - // We're going to write out the inverse condition so - // that if it doesn't match it will skip over the - // instructions used for branching. - bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into()); - emit_load_value(cb, Assembler::SCRATCH0, dst_addr); - br(cb, Assembler::SCRATCH0); - - // Here we'll return the number of instructions that it - // took to write out the destination address + 1 for the - // b.cond and 1 for the br. - load_insns + 2 - }; - - if let Target::CodePtr(_) = target { - // We need to make sure we have at least 6 instructions for - // every kind of jump for invalidation purposes, so we're - // going to write out padding nop instructions here. - assert!(num_insns <= cb.conditional_jump_insns()); - for _ in num_insns..cb.conditional_jump_insns() { nop(cb); } - } + generate_branch::(cb, src_addr, dst_addr); }, Target::Label(label_idx) => { - // Here we're going to save enough space for ourselves and - // then come back and write the instruction once we know the - // offset. We're going to assume we can fit into a single - // b.cond instruction. It will panic otherwise. - cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { - let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); - bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); + // We save `cb.conditional_jump_insns` number of bytes since we may use up to that amount + // `generate_branch` will pad the emitted branch instructions with `nop`s for each unused byte. + cb.label_ref(label_idx, (cb.conditional_jump_insns() * 4) as usize, |cb, src_addr, dst_addr| { + generate_branch::(cb, src_addr - (cb.conditional_jump_insns() * 4) as i64, dst_addr); }); }, Target::SideExit { .. } => { @@ -2042,6 +2039,32 @@ mod tests { "}); } + #[test] + fn test_label_branch_generate_bounds() { + // The immediate in a conditional branch is a 19 bit unsigned integer + // which has a max value of 2^18 - 1. + const IMMEDIATE_MAX_VALUE: usize = 2usize.pow(18) - 1; + + // `IMMEDIATE_MAX_VALUE` number of dummy instructions will be generated + // plus a compare, a jump instruction, and a label. + const MEMORY_REQUIRED: usize = (IMMEDIATE_MAX_VALUE + 8)*4; + + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy_sized(MEMORY_REQUIRED); + + let far_label = asm.new_label("far"); + + asm.cmp(Opnd::Reg(X0_REG), Opnd::UImm(1)); + asm.je(far_label.clone()); + + (0..IMMEDIATE_MAX_VALUE).for_each(|_| { + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::Reg(TEMP_REGS[2])); + }); + + asm.write_label(far_label.clone()); + asm.compile_with_num_regs(&mut cb, 1); + } + #[test] fn test_add_with_immediate() { let (mut asm, mut cb) = setup_asm(); From 4263b7eb45f8eb67e3e46af64856736c84f5b73e Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Mon, 8 Sep 2025 12:11:56 -0700 Subject: [PATCH 3/5] ZJIT: Add RubyVM::ZJIT.reset_stats! method (GH-14479) This allows for more precise tracking of stats programmatically which is particularly useful for our nightly benchmarking suite. - Define rust function - Expose to C - Wrap with Ruby API - Add a test --- test/ruby/test_zjit.rb | 22 ++++++++++++++++++++++ zjit.c | 1 + zjit.rb | 5 +++++ zjit/src/stats.rs | 15 +++++++++++++++ 4 files changed, 43 insertions(+) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 280a7503d41150..4d8eb30c804cd5 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1878,6 +1878,28 @@ def test = 1 }, stats: true end + def test_reset_stats + assert_runs 'true', %q{ + def test = 1 + 100.times { test } + + # Get initial stats and verify they're non-zero + initial_stats = RubyVM::ZJIT.stats + + # Reset the stats + RubyVM::ZJIT.reset_stats! + + # Get stats after reset + reset_stats = RubyVM::ZJIT.stats + + [ + # After reset, counters should be zero or at least much smaller + # (some instructions might execute between reset and reading stats) + :zjit_insn_count.then { |s| initial_stats[s] > 0 && reset_stats[s] < initial_stats[s] }, + ].all? + }, stats: true + end + def test_zjit_option_uses_array_each_in_ruby omit 'ZJIT wrongly compiles Array#each, so it is disabled for now' assert_runs '""', %q{ diff --git a/zjit.c b/zjit.c index e1ea6d7e098060..5d50775fc72c05 100644 --- a/zjit.c +++ b/zjit.c @@ -342,6 +342,7 @@ rb_zjit_insn_leaf(int insn, const VALUE *opes) // Primitives used by zjit.rb. Don't put other functions below, which wouldn't use them. VALUE rb_zjit_assert_compiles(rb_execution_context_t *ec, VALUE self); VALUE rb_zjit_stats(rb_execution_context_t *ec, VALUE self, VALUE target_key); +VALUE rb_zjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self); VALUE rb_zjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self); VALUE rb_zjit_print_stats_p(rb_execution_context_t *ec, VALUE self); diff --git a/zjit.rb b/zjit.rb index d70ff1dd47622d..39b0e5eb221af6 100644 --- a/zjit.rb +++ b/zjit.rb @@ -29,6 +29,11 @@ def stats(target_key = nil) Primitive.rb_zjit_stats(target_key) end + # Discard statistics collected for `--zjit-stats`. + def reset_stats! + Primitive.rb_zjit_reset_stats_bang + end + # Get the summary of ZJIT statistics as a String def stats_string buf = +"***ZJIT: Printing ZJIT statistics on exit***\n" diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 2e7d743f7be42a..adfc28b4a8519f 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -237,6 +237,21 @@ pub fn exit_counter_ptr(reason: crate::hir::SideExitReason) -> *mut u64 { counter_ptr(counter) } +/// Primitive called in zjit.rb. Zero out all the counters. +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_reset_stats_bang(_ec: EcPtr, _self: VALUE) -> VALUE { + let counters = ZJITState::get_counters(); + let exit_counters = ZJITState::get_exit_counters(); + + // Reset all counters to zero + *counters = Counters::default(); + + // Reset exit counters for YARV instructions + exit_counters.as_mut_slice().fill(0); + + Qnil +} + /// Return a Hash object that contains ZJIT statistics #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_stats(_ec: EcPtr, _self: VALUE, target_key: VALUE) -> VALUE { From a3eb4601e86ccfca798995860a5bcfb38dd13789 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 8 Sep 2025 13:11:34 -0700 Subject: [PATCH 4/5] ZJIT, YJIT: Make the workflow names consistent with file names (#14462) ZJIT, YJIT: Make the workflow names consistent with file names --- .github/workflows/yjit-macos.yml | 2 +- .github/workflows/zjit-macos.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/yjit-macos.yml b/.github/workflows/yjit-macos.yml index 72ef599a39010d..2769ae7cda37f0 100644 --- a/.github/workflows/yjit-macos.yml +++ b/.github/workflows/yjit-macos.yml @@ -1,4 +1,4 @@ -name: YJIT macOS Arm64 +name: YJIT macOS on: push: branches: diff --git a/.github/workflows/zjit-macos.yml b/.github/workflows/zjit-macos.yml index 892b605ce8114f..c2ec43cd9111ac 100644 --- a/.github/workflows/zjit-macos.yml +++ b/.github/workflows/zjit-macos.yml @@ -1,4 +1,4 @@ -name: ZJIT macOS Arm64 +name: ZJIT macOS on: push: branches: From 8d79187523cdc0b5b961f4e856df8824c241f16e Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 2 Sep 2025 14:30:23 +0200 Subject: [PATCH 5/5] Fix undefined behaviour in rb_alias_variable entry1 is allocated using xmalloc (through ALLOC), which returns undefined memory. We never set the ractor_local field, so the value is undefined. --- variable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variable.c b/variable.c index cccb09e329dbd4..ef35cf39903dc6 100644 --- a/variable.c +++ b/variable.c @@ -1150,7 +1150,7 @@ rb_alias_variable(ID name1, ID name2) RB_VM_LOCKING() { entry2 = rb_global_entry(name2); if (!rb_id_table_lookup(gtbl, name1, &data1)) { - entry1 = ALLOC(struct rb_global_entry); + entry1 = ZALLOC(struct rb_global_entry); entry1->id = name1; rb_id_table_insert(gtbl, name1, (VALUE)entry1); }