From 16af72790837ffb10c87ec23f99a6c519abc21e3 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 3 Nov 2025 14:30:59 -0500 Subject: [PATCH 1/7] Avoid taking vm barrier in heap_prepare() (#14425) We can avoid taking this barrier if we're not incremental marking or lazy sweeping. I found this was taking a significant amount of samples when profiling `Psych.load` in multiple ractors due to the vm barrier. With this change, we get significant improvements in ractor benchmarks that allocate lots of objects. -- Psych.load benchmark -- ``` Before: After: r: itr: time r: itr: time 0 #1: 960ms 0 #1: 943ms 0 #2: 979ms 0 #2: 939ms 0 #3: 968ms 0 #3: 948ms 0 #4: 963ms 0 #4: 946ms 0 #5: 964ms 0 #5: 944ms 1 #1: 947ms 1 #1: 940ms 1 #2: 950ms 1 #2: 947ms 1 #3: 962ms 1 #3: 950ms 1 #4: 947ms 1 #4: 945ms 1 #5: 947ms 1 #5: 943ms 2 #1: 1131ms 2 #1: 1005ms 2 #2: 1153ms 2 #2: 996ms 2 #3: 1155ms 2 #3: 1003ms 2 #4: 1205ms 2 #4: 1012ms 2 #5: 1179ms 2 #5: 1012ms 4 #1: 1555ms 4 #1: 1209ms 4 #2: 1509ms 4 #2: 1244ms 4 #3: 1529ms 4 #3: 1254ms 4 #4: 1512ms 4 #4: 1267ms 4 #5: 1513ms 4 #5: 1245ms 6 #1: 2122ms 6 #1: 1584ms 6 #2: 2080ms 6 #2: 1532ms 6 #3: 2079ms 6 #3: 1476ms 6 #4: 2021ms 6 #4: 1463ms 6 #5: 1999ms 6 #5: 1461ms 8 #1: 2741ms 8 #1: 1630ms 8 #2: 2711ms 8 #2: 1632ms 8 #3: 2688ms 8 #3: 1654ms 8 #4: 2641ms 8 #4: 1684ms 8 #5: 2656ms 8 #5: 1752ms ``` --- gc/default/default.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 0a9945cdac98b2..e0a5aade85f223 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -985,6 +985,9 @@ total_final_slots_count(rb_objspace_t *objspace) #define GC_INCREMENTAL_SWEEP_SLOT_COUNT 2048 #define GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT 1024 #define is_lazy_sweeping(objspace) (GC_ENABLE_LAZY_SWEEP && has_sweeping_pages(objspace)) +/* In lazy sweeping or the previous incremental marking finished and did not yield a free page. */ +#define needs_continue_sweeping(objspace, heap) \ + ((heap)->free_pages == NULL && is_lazy_sweeping(objspace)) #if SIZEOF_LONG == SIZEOF_VOIDP # define obj_id_to_ref(objid) ((objid) ^ FIXNUM_FLAG) /* unset FIXNUM_FLAG */ @@ -2022,7 +2025,10 @@ static void gc_continue(rb_objspace_t *objspace, rb_heap_t *heap) { unsigned int lock_lev; - gc_enter(objspace, gc_enter_event_continue, &lock_lev); + bool needs_gc = is_incremental_marking(objspace) || needs_continue_sweeping(objspace, heap); + if (!needs_gc) return; + + gc_enter(objspace, gc_enter_event_continue, &lock_lev); // takes vm barrier, try to avoid /* Continue marking if in incremental marking. */ if (is_incremental_marking(objspace)) { @@ -2031,9 +2037,7 @@ gc_continue(rb_objspace_t *objspace, rb_heap_t *heap) } } - /* Continue sweeping if in lazy sweeping or the previous incremental - * marking finished and did not yield a free page. */ - if (heap->free_pages == NULL && is_lazy_sweeping(objspace)) { + if (needs_continue_sweeping(objspace, heap)) { gc_sweep_continue(objspace, heap); } From 8117600232161bdea403481ad2b9b66d36856c1a Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Mon, 3 Nov 2025 13:19:09 -0700 Subject: [PATCH 2/7] ZJIT: Implement include_p for opt_(new|dup)array_send YARV insns (#14885) These just call to the C functions that do the optimized test but this avoids the side exit. See https://github.com/ruby/ruby/pull/12123 for the original CRuby/YJIT implementation. --- test/ruby/test_zjit.rb | 18 ++++++++++++ zjit/src/codegen.rs | 46 ++++++++++++++++++++++++++++++ zjit/src/hir.rs | 58 +++++++++++++++++++++++++++++++++++++ zjit/src/hir/tests.rs | 65 +++++++++++++++++++++++++++++++++++++++++- zjit/src/stats.rs | 2 ++ 5 files changed, 188 insertions(+), 1 deletion(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 6805d91406be5c..f12ac19af59537 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -957,6 +957,24 @@ def test }, insns: [:opt_new], call_threshold: 2 end + def test_opt_newarray_send_include_p + assert_compiles '[true, false]', %q{ + def test(x) + [:y, 1, Object.new].include?(x) + end + [test(1), test("n")] + }, insns: [:opt_newarray_send], call_threshold: 1 + end + + def test_opt_duparray_send_include_p + assert_compiles '[true, false]', %q{ + def test(x) + [:y, 1].include?(x) + end + [test(1), test("n")] + }, insns: [:opt_duparray_send], call_threshold: 1 + end + def test_new_hash_empty assert_compiles '{}', %q{ def test = {} diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index d4ed6304cb5577..72d111ac8a97f7 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -450,6 +450,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::LoadSelf => gen_load_self(), &Insn::LoadField { recv, id, offset, return_type: _ } => gen_load_field(asm, opnd!(recv), id, offset), &Insn::IsBlockGiven => gen_is_block_given(jit, asm), + Insn::ArrayInclude { elements, target, state } => gen_array_include(jit, asm, opnds!(elements), opnd!(target), &function.frame_state(*state)), + &Insn::DupArrayInclude { ary, target, state } => gen_dup_array_include(jit, asm, ary, opnd!(target), &function.frame_state(state)), &Insn::ArrayMax { state, .. } | &Insn::FixnumDiv { state, .. } | &Insn::Throw { state, .. } @@ -1328,6 +1330,50 @@ fn gen_array_length(asm: &mut Assembler, array: Opnd) -> lir::Opnd { asm_ccall!(asm, rb_jit_array_len, array) } +fn gen_array_include( + jit: &JITState, + asm: &mut Assembler, + elements: Vec, + target: Opnd, + state: &FrameState, +) -> lir::Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + + let num: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long"); + + // After gen_prepare_non_leaf_call, the elements are spilled to the Ruby stack. + // The elements are at the bottom of the virtual stack, followed by the target. + // Get a pointer to the first element on the Ruby stack. + let stack_bottom = state.stack().len() - elements.len() - 1; + let elements_ptr = asm.lea(Opnd::mem(64, SP, stack_bottom as i32 * SIZEOF_VALUE_I32)); + + unsafe extern "C" { + fn rb_vm_opt_newarray_include_p(ec: EcPtr, num: c_long, elts: *const VALUE, target: VALUE) -> VALUE; + } + asm.ccall( + rb_vm_opt_newarray_include_p as *const u8, + vec![EC, num.into(), elements_ptr, target], + ) +} + +fn gen_dup_array_include( + jit: &JITState, + asm: &mut Assembler, + ary: VALUE, + target: Opnd, + state: &FrameState, +) -> lir::Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + + unsafe extern "C" { + fn rb_vm_opt_duparray_include_p(ec: EcPtr, ary: VALUE, target: VALUE) -> VALUE; + } + asm.ccall( + rb_vm_opt_duparray_include_p as *const u8, + vec![EC, ary.into(), target], + ) +} + /// Compile a new hash instruction fn gen_new_hash( jit: &mut JITState, diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 55bec186512799..ce164f37d618a7 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -457,6 +457,7 @@ impl PtrPrintMap { #[derive(Debug, Clone, Copy)] pub enum SideExitReason { UnknownNewarraySend(vm_opt_newarray_send_type), + UnknownDuparraySend(u64), UnknownSpecialVariable(u64), UnhandledHIRInsn(InsnId), UnhandledYARVInsn(u32), @@ -548,6 +549,7 @@ impl std::fmt::Display for SideExitReason { SideExitReason::UnknownNewarraySend(VM_OPT_NEWARRAY_SEND_PACK) => write!(f, "UnknownNewarraySend(PACK)"), SideExitReason::UnknownNewarraySend(VM_OPT_NEWARRAY_SEND_PACK_BUFFER) => write!(f, "UnknownNewarraySend(PACK_BUFFER)"), SideExitReason::UnknownNewarraySend(VM_OPT_NEWARRAY_SEND_INCLUDE_P) => write!(f, "UnknownNewarraySend(INCLUDE_P)"), + SideExitReason::UnknownDuparraySend(method_id) => write!(f, "UnknownDuparraySend({})", method_id), SideExitReason::GuardType(guard_type) => write!(f, "GuardType({guard_type})"), SideExitReason::GuardTypeNot(guard_type) => write!(f, "GuardTypeNot({guard_type})"), SideExitReason::GuardBitEquals(value) => write!(f, "GuardBitEquals({})", value.print(&PtrPrintMap::identity())), @@ -616,6 +618,8 @@ pub enum Insn { NewRangeFixnum { low: InsnId, high: InsnId, flag: RangeType, state: InsnId }, ArrayDup { val: InsnId, state: InsnId }, ArrayMax { elements: Vec, state: InsnId }, + ArrayInclude { elements: Vec, target: InsnId, state: InsnId }, + DupArrayInclude { ary: VALUE, target: InsnId, state: InsnId }, /// Extend `left` with the elements from `right`. `left` and `right` must both be `Array`. ArrayExtend { left: InsnId, right: InsnId, state: InsnId }, /// Push `val` onto `array`, where `array` is already `Array`. @@ -988,6 +992,18 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { } Ok(()) } + Insn::ArrayInclude { elements, target, .. } => { + write!(f, "ArrayInclude")?; + let mut prefix = " "; + for element in elements { + write!(f, "{prefix}{element}")?; + prefix = ", "; + } + write!(f, " | {target}") + } + Insn::DupArrayInclude { ary, target, .. } => { + write!(f, "DupArrayInclude {} | {}", ary.print(self.ptr_map), target) + } Insn::ArrayDup { val, .. } => { write!(f, "ArrayDup {val}") } Insn::HashDup { val, .. } => { write!(f, "HashDup {val}") } Insn::HashAref { hash, key, .. } => { write!(f, "HashAref {hash}, {key}")} @@ -1789,6 +1805,8 @@ impl Function { &ArrayPop { array, state } => ArrayPop { array: find!(array), state: find!(state) }, &ArrayLength { array } => ArrayLength { array: find!(array) }, &ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) }, + &ArrayInclude { ref elements, target, state } => ArrayInclude { elements: find_vec!(elements), target: find!(target), state: find!(state) }, + &DupArrayInclude { ary, target, state } => DupArrayInclude { ary, target: find!(target), state: find!(state) }, &SetGlobal { id, val, state } => SetGlobal { id, val: find!(val), state }, &GetIvar { self_val, id, state } => GetIvar { self_val: find!(self_val), id, state }, &LoadField { recv, id, offset, return_type } => LoadField { recv: find!(recv), id, offset, return_type }, @@ -1923,6 +1941,8 @@ impl Function { Insn::GetConstantPath { .. } => types::BasicObject, Insn::IsBlockGiven => types::BoolExact, Insn::ArrayMax { .. } => types::BasicObject, + Insn::ArrayInclude { .. } => types::BoolExact, + Insn::DupArrayInclude { .. } => types::BoolExact, Insn::GetGlobal { .. } => types::BasicObject, Insn::GetIvar { .. } => types::BasicObject, Insn::LoadPC => types::CPtr, @@ -3211,6 +3231,15 @@ impl Function { worklist.extend(elements); worklist.push_back(state); } + &Insn::ArrayInclude { ref elements, target, state } => { + worklist.extend(elements); + worklist.push_back(target); + worklist.push_back(state); + } + &Insn::DupArrayInclude { target, state, .. } => { + worklist.push_back(target); + worklist.push_back(state); + } &Insn::NewRange { low, high, state, .. } | &Insn::NewRangeFixnum { low, high, state, .. } => { worklist.push_back(low); @@ -4448,6 +4477,11 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); let (bop, insn) = match method { VM_OPT_NEWARRAY_SEND_MAX => (BOP_MAX, Insn::ArrayMax { elements, state: exit_id }), + VM_OPT_NEWARRAY_SEND_INCLUDE_P => { + let target = elements[elements.len() - 1]; + let array_elements = elements[..elements.len() - 1].to_vec(); + (BOP_INCLUDE_P, Insn::ArrayInclude { elements: array_elements, target, state: exit_id }) + }, _ => { // Unknown opcode; side-exit into the interpreter fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnknownNewarraySend(method) }); @@ -4468,6 +4502,30 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let insn_id = fun.push_insn(block, Insn::ArrayDup { val, state: exit_id }); state.stack_push(insn_id); } + YARVINSN_opt_duparray_send => { + let ary = get_arg(pc, 0); + let method_id = get_arg(pc, 1).as_u64(); + let argc = get_arg(pc, 2).as_usize(); + if argc != 1 { + break; + } + let target = state.stack_pop()?; + let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); + let bop = match method_id { + x if x == ID!(include_p).0 => BOP_INCLUDE_P, + _ => { + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnknownDuparraySend(method_id) }); + break; + }, + }; + if !unsafe { rb_BASIC_OP_UNREDEFINED_P(bop, ARRAY_REDEFINED_OP_FLAG) } { + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::PatchPoint(Invariant::BOPRedefined { klass: ARRAY_REDEFINED_OP_FLAG, bop }) }); + break; + } + fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::BOPRedefined { klass: ARRAY_REDEFINED_OP_FLAG, bop }, state: exit_id }); + let insn_id = fun.push_insn(block, Insn::DupArrayInclude { ary, target, state: exit_id }); + state.stack_push(insn_id); + } YARVINSN_newhash => { let count = get_arg(pc, 0).as_usize(); assert!(count % 2 == 0, "newhash count should be even"); diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index c58e14ad4e0735..fe67779d85a808 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -2023,7 +2023,70 @@ pub mod hir_build_tests { Jump bb2(v8, v9, v10, v11, v12) bb2(v14:BasicObject, v15:BasicObject, v16:BasicObject, v17:NilClass, v18:NilClass): v25:BasicObject = SendWithoutBlock v15, :+, v16 - SideExit UnknownNewarraySend(INCLUDE_P) + PatchPoint BOPRedefined(ARRAY_REDEFINED_OP_FLAG, 33) + v30:BoolExact = ArrayInclude v15, v16 | v16 + PatchPoint NoEPEscape(test) + v35:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v37:ArrayExact = ArrayDup v35 + v39:BasicObject = SendWithoutBlock v14, :puts, v37 + PatchPoint NoEPEscape(test) + CheckInterrupts + Return v30 + "); + } + + #[test] + fn test_opt_duparray_send_include_p() { + eval(" + def test(x) + [:a, :b].include?(x) + end + "); + assert_contains_opcode("test", YARVINSN_opt_duparray_send); + assert_snapshot!(hir_string("test"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint BOPRedefined(ARRAY_REDEFINED_OP_FLAG, 33) + v15:BoolExact = DupArrayInclude VALUE(0x1000) | v9 + CheckInterrupts + Return v15 + "); + } + + #[test] + fn test_opt_duparray_send_include_p_redefined() { + eval(" + class Array + alias_method :old_include?, :include? + def include?(x) + old_include?(x) + end + end + def test(x) + [:a, :b].include?(x) + end + "); + assert_contains_opcode("test", YARVINSN_opt_duparray_send); + assert_snapshot!(hir_string("test"), @r" + fn test@:9: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + SideExit PatchPoint(BOPRedefined(ARRAY_REDEFINED_OP_FLAG, 33)) "); } diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index e69f1d95884564..2d7139eec6c053 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -128,6 +128,7 @@ make_counters! { // exit_: Side exits reasons exit_compile_error, exit_unknown_newarray_send, + exit_unknown_duparray_send, exit_unhandled_tailcall, exit_unhandled_splat, exit_unhandled_kwarg, @@ -366,6 +367,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { use crate::stats::Counter::*; match reason { UnknownNewarraySend(_) => exit_unknown_newarray_send, + UnknownDuparraySend(_) => exit_unknown_duparray_send, UnhandledCallType(Tailcall) => exit_unhandled_tailcall, UnhandledCallType(Splat) => exit_unhandled_splat, UnhandledCallType(Kwarg) => exit_unhandled_kwarg, From 4001e81a8eb04ac1b7653b05762bcdcb364760e1 Mon Sep 17 00:00:00 2001 From: Max Leopold Date: Mon, 3 Nov 2025 21:46:26 +0100 Subject: [PATCH 3/7] ZJIT: Inline String#bytesize (#15033) Inline the `String#bytesize` function and remove the C call. --- test/ruby/test_zjit.rb | 10 ++++++++++ zjit/src/codegen.rs | 9 +++++++++ zjit/src/cruby.rs | 1 + zjit/src/cruby_methods.rs | 23 ++++++++++++++++++++++- zjit/src/hir.rs | 11 ++++++++++- zjit/src/hir/opt_tests.rs | 33 +++------------------------------ zjit/src/stats.rs | 2 ++ 7 files changed, 57 insertions(+), 32 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index f12ac19af59537..de2d1e61528e86 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -2568,6 +2568,16 @@ def test(str) }, call_threshold: 2 end + def test_string_bytesize_multibyte + assert_compiles '4', %q{ + def test(s) + s.bytesize + end + + test("💎") + }, call_threshold: 2 + end + def test_nil_value_nil_opt_with_guard assert_compiles 'true', %q{ def test(val) = val.nil? diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 72d111ac8a97f7..7cd677bde3d5b3 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -403,6 +403,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::IsBitEqual { left, right } => gen_is_bit_equal(asm, opnd!(left), opnd!(right)), &Insn::IsBitNotEqual { left, right } => gen_is_bit_not_equal(asm, opnd!(left), opnd!(right)), &Insn::BoxBool { val } => gen_box_bool(asm, opnd!(val)), + &Insn::BoxFixnum { val, state } => gen_box_fixnum(jit, asm, opnd!(val), &function.frame_state(state)), Insn::Test { val } => gen_test(asm, opnd!(val)), Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), @@ -1595,6 +1596,14 @@ fn gen_box_bool(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { asm.csel_nz(Opnd::Value(Qtrue), Opnd::Value(Qfalse)) } +fn gen_box_fixnum(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, state: &FrameState) -> lir::Opnd { + // Load the value, then test for overflow and tag it + let val = asm.load(val); + let shifted = asm.lshift(val, Opnd::UImm(1)); + asm.jo(side_exit(jit, state, BoxFixnumOverflow)); + asm.or(shifted, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)) +} + fn gen_anytostring(asm: &mut Assembler, val: lir::Opnd, str: lir::Opnd, state: &FrameState) -> lir::Opnd { gen_prepare_leaf_call_with_gc(asm, state); diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 89488fd2559e1c..631acbd8635686 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -1376,6 +1376,7 @@ pub(crate) mod ids { name: freeze name: minusat content: b"-@" name: aref content: b"[]" + name: len name: _as_heap } diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 0af3be1819dac7..37d75f45974e2a 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -195,7 +195,7 @@ pub fn init() -> Annotations { annotate!(rb_mKernel, "itself", inline_kernel_itself); annotate!(rb_mKernel, "block_given?", inline_kernel_block_given_p); annotate!(rb_mKernel, "===", inline_eqq); - annotate!(rb_cString, "bytesize", types::Fixnum, no_gc, leaf, elidable); + annotate!(rb_cString, "bytesize", inline_string_bytesize); annotate!(rb_cString, "size", types::Fixnum, no_gc, leaf, elidable); annotate!(rb_cString, "length", types::Fixnum, no_gc, leaf, elidable); annotate!(rb_cString, "getbyte", inline_string_getbyte); @@ -305,6 +305,27 @@ fn inline_hash_aref(fun: &mut hir::Function, block: hir::BlockId, recv: hir::Ins None } + +fn inline_string_bytesize(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { + if args.is_empty() && fun.likely_a(recv, types::String, state) { + let recv = fun.coerce_to(block, recv, types::String, state); + let len = fun.push_insn(block, hir::Insn::LoadField { + recv, + id: ID!(len), + offset: RUBY_OFFSET_RSTRING_LEN as i32, + return_type: types::CInt64, + }); + + let result = fun.push_insn(block, hir::Insn::BoxFixnum { + val: len, + state, + }); + + return Some(result); + } + None +} + fn inline_string_getbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { let &[index] = args else { return None; }; if fun.likely_a(index, types::Fixnum, state) { diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index ce164f37d618a7..136f7b452fcd38 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -478,6 +478,7 @@ pub enum SideExitReason { BlockParamProxyNotIseqOrIfunc, StackOverflow, FixnumModByZero, + BoxFixnumOverflow, } #[derive(Debug, Clone, Copy)] @@ -655,6 +656,8 @@ pub enum Insn { IsBitNotEqual { left: InsnId, right: InsnId }, /// Convert a C `bool` to a Ruby `Qtrue`/`Qfalse`. Same as `RBOOL` macro. BoxBool { val: InsnId }, + /// Convert a C `long` to a Ruby `Fixnum`. Side exit on overflow. + BoxFixnum { val: InsnId, state: InsnId }, // TODO(max): In iseq body types that are not ISEQ_TYPE_METHOD, rewrite to Constant false. Defined { op_type: usize, obj: VALUE, pushval: VALUE, v: InsnId, state: InsnId }, GetConstantPath { ic: *const iseq_inline_constant_cache, state: InsnId }, @@ -924,6 +927,7 @@ impl Insn { Insn::NewRangeFixnum { .. } => false, Insn::StringGetbyteFixnum { .. } => false, Insn::IsBlockGiven => false, + Insn::BoxFixnum { .. } => false, _ => true, } } @@ -1057,6 +1061,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::IsBitEqual { left, right } => write!(f, "IsBitEqual {left}, {right}"), Insn::IsBitNotEqual { left, right } => write!(f, "IsBitNotEqual {left}, {right}"), Insn::BoxBool { val } => write!(f, "BoxBool {val}"), + Insn::BoxFixnum { val, .. } => write!(f, "BoxFixnum {val}"), Insn::Jump(target) => { write!(f, "Jump {target}") } Insn::IfTrue { val, target } => { write!(f, "IfTrue {val}, {target}") } Insn::IfFalse { val, target } => { write!(f, "IfFalse {val}, {target}") } @@ -1696,6 +1701,7 @@ impl Function { &IsBitEqual { left, right } => IsBitEqual { left: find!(left), right: find!(right) }, &IsBitNotEqual { left, right } => IsBitNotEqual { left: find!(left), right: find!(right) }, &BoxBool { val } => BoxBool { val: find!(val) }, + &BoxFixnum { val, state } => BoxFixnum { val: find!(val), state: find!(state) }, Jump(target) => Jump(find_branch_edge!(target)), &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) }, &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, @@ -1888,6 +1894,7 @@ impl Function { Insn::IsBitEqual { .. } => types::CBool, Insn::IsBitNotEqual { .. } => types::CBool, Insn::BoxBool { .. } => types::BoolExact, + Insn::BoxFixnum { .. } => types::Fixnum, Insn::StringCopy { .. } => types::StringExact, Insn::StringIntern { .. } => types::Symbol, Insn::StringConcat { .. } => types::StringExact, @@ -3281,7 +3288,8 @@ impl Function { | &Insn::GuardNotFrozen { val, state } | &Insn::ToArray { val, state } | &Insn::IsMethodCfunc { val, state, .. } - | &Insn::ToNewArray { val, state } => { + | &Insn::ToNewArray { val, state } + | &Insn::BoxFixnum { val, state } => { worklist.push_back(val); worklist.push_back(state); } @@ -3759,6 +3767,7 @@ impl Function { } } Insn::BoxBool { val } => self.assert_subtype(insn_id, val, types::CBool), + Insn::BoxFixnum { val, .. } => self.assert_subtype(insn_id, val, types::CInt64), Insn::SetGlobal { val, .. } => self.assert_subtype(insn_id, val, types::BasicObject), Insn::GetIvar { self_val, .. } => self.assert_subtype(insn_id, self_val, types::BasicObject), Insn::SetIvar { self_val, val, .. } => { diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index d0b3203ac186e6..07f80c06824b71 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -2686,34 +2686,6 @@ mod hir_opt_tests { "); } - #[test] - fn string_bytesize_simple() { - eval(" - def test = 'abc'.bytesize - test - test - "); - assert_snapshot!(hir_string("test"), @r" - fn test@:2: - bb0(): - EntryPoint interpreter - v1:BasicObject = LoadSelf - Jump bb2(v1) - bb1(v4:BasicObject): - EntryPoint JIT(0) - Jump bb2(v4) - bb2(v6:BasicObject): - v10:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) - v12:StringExact = StringCopy v10 - PatchPoint MethodRedefined(String@0x1008, bytesize@0x1010, cme:0x1018) - PatchPoint NoSingletonClass(String@0x1008) - IncrCounter inline_cfunc_optimized_send_count - v24:Fixnum = CCall bytesize@0x1040, v12 - CheckInterrupts - Return v24 - "); - } - #[test] fn dont_replace_get_constant_path_with_empty_ic() { eval(" @@ -7161,7 +7133,7 @@ mod hir_opt_tests { } #[test] - fn test_specialize_string_bytesize() { + fn test_inline_string_bytesize() { eval(r#" def test(s) s.bytesize @@ -7182,8 +7154,9 @@ mod hir_opt_tests { PatchPoint MethodRedefined(String@0x1000, bytesize@0x1008, cme:0x1010) PatchPoint NoSingletonClass(String@0x1000) v23:StringExact = GuardType v9, StringExact + v24:CInt64 = LoadField v23, :len@0x1038 + v25:Fixnum = BoxFixnum v24 IncrCounter inline_cfunc_optimized_send_count - v25:Fixnum = CCall bytesize@0x1038, v23 CheckInterrupts Return v25 "); diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 2d7139eec6c053..35af2b1d9d3bea 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -139,6 +139,7 @@ make_counters! { exit_fixnum_sub_overflow, exit_fixnum_mult_overflow, exit_fixnum_mod_by_zero, + exit_box_fixnum_overflow, exit_guard_type_failure, exit_guard_type_not_failure, exit_guard_bit_equals_failure, @@ -378,6 +379,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { FixnumSubOverflow => exit_fixnum_sub_overflow, FixnumMultOverflow => exit_fixnum_mult_overflow, FixnumModByZero => exit_fixnum_mod_by_zero, + BoxFixnumOverflow => exit_box_fixnum_overflow, GuardType(_) => exit_guard_type_failure, GuardTypeNot(_) => exit_guard_type_not_failure, GuardBitEquals(_) => exit_guard_bit_equals_failure, From bac6a25ad3b6ed02ece11a2790cec3a2e6f8c8c9 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Sun, 2 Nov 2025 21:47:42 -0800 Subject: [PATCH 4/7] [ruby/rubygems] Remove open-ended and prerelease dependency warnings when building gems In general, rubygems should provide mechanism and not policy. Pessimistic versioning is not universally better, and in many cases, it can cause more problems than it solves. Rubygems should not be warning against open-ended versioning when building gems. The majority of the default gems with dependencies do not use pessimistic versioning, which indicates that Ruby itself recognizes that open-ended versioning is generally better. In some cases, depending on a prerelease gem is the only choice other than not releasing a gem. If you are building an extension gem for a feature in a prerelease version of another gem, then depending on the prerelease version is the only way to ensure a compatible dependency is installed. https://github.com/ruby/rubygems/commit/beba8dd065 --- lib/rubygems/specification_policy.rb | 36 ------------------------- test/rubygems/test_gem_specification.rb | 22 +-------------- 2 files changed, 1 insertion(+), 57 deletions(-) diff --git a/lib/rubygems/specification_policy.rb b/lib/rubygems/specification_policy.rb index d79ee7df9252ae..e5008a24dbf4a6 100644 --- a/lib/rubygems/specification_policy.rb +++ b/lib/rubygems/specification_policy.rb @@ -190,9 +190,6 @@ def validate_duplicate_dependencies # :nodoc: ## # Checks that the gem does not depend on itself. - # Checks that dependencies use requirements as we recommend. Warnings are - # issued when dependencies are open-ended or overly strict for semantic - # versioning. def validate_dependencies # :nodoc: warning_messages = [] @@ -200,39 +197,6 @@ def validate_dependencies # :nodoc: if dep.name == @specification.name # warn on self reference warning_messages << "Self referencing dependency is unnecessary and strongly discouraged." end - - prerelease_dep = dep.requirements_list.any? do |req| - Gem::Requirement.new(req).prerelease? - end - - warning_messages << "prerelease dependency on #{dep} is not recommended" if - prerelease_dep && !@specification.version.prerelease? - - open_ended = dep.requirement.requirements.all? do |op, version| - !version.prerelease? && [">", ">="].include?(op) - end - - next unless open_ended - op, dep_version = dep.requirement.requirements.first - - segments = dep_version.segments - - base = segments.first 2 - - recommendation = if [">", ">="].include?(op) && segments == [0] - " use a bounded requirement, such as \"~> x.y\"" - else - bugfix = if op == ">" - ", \"> #{dep_version}\"" - elsif op == ">=" && base != segments - ", \">= #{dep_version}\"" - end - - " if #{dep.name} is semantically versioned, use:\n" \ - " add_#{dep.type}_dependency \"#{dep.name}\", \"~> #{base.join "."}\"#{bugfix}" - end - - warning_messages << ["open-ended dependency on #{dep} is not recommended", recommendation].join("\n") + "\n" end if warning_messages.any? warning_messages.each {|warning_message| warning warning_message } diff --git a/test/rubygems/test_gem_specification.rb b/test/rubygems/test_gem_specification.rb index af351f4d2e1fc0..3a325c439c1cd8 100644 --- a/test/rubygems/test_gem_specification.rb +++ b/test/rubygems/test_gem_specification.rb @@ -2671,27 +2671,7 @@ def test_validate_dependencies @a1.validate end - expected = <<-EXPECTED -#{w}: prerelease dependency on b (>= 1.0.rc1) is not recommended -#{w}: prerelease dependency on c (>= 2.0.rc2, development) is not recommended -#{w}: open-ended dependency on i (>= 1.2) is not recommended - if i is semantically versioned, use: - add_runtime_dependency "i", "~> 1.2" -#{w}: open-ended dependency on j (>= 1.2.3) is not recommended - if j is semantically versioned, use: - add_runtime_dependency "j", "~> 1.2", ">= 1.2.3" -#{w}: open-ended dependency on k (> 1.2) is not recommended - if k is semantically versioned, use: - add_runtime_dependency "k", "~> 1.2", "> 1.2" -#{w}: open-ended dependency on l (> 1.2.3) is not recommended - if l is semantically versioned, use: - add_runtime_dependency "l", "~> 1.2", "> 1.2.3" -#{w}: open-ended dependency on o (>= 0) is not recommended - use a bounded requirement, such as "~> x.y" -#{w}: See https://guides.rubygems.org/specification-reference/ for help - EXPECTED - - assert_equal expected, @ui.error, "warning" + assert_equal "", @ui.error, "warning" end end From 2c2eaa3103e5cf1cbfc2b16d9db975a9b8a0399a Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Fri, 31 Oct 2025 14:49:24 -0700 Subject: [PATCH 5/7] [ruby/rubygems] Fix constants in TAR to be frozen I would like to use the tar implementation inside a Ractor, but two of the constants are not frozen. This patch freezes the constants so we can use it in a Ractor. https://github.com/ruby/rubygems/commit/0ff4790f4c --- lib/rubygems/package/tar_header.rb | 8 ++++---- test/rubygems/test_gem_package_tar_header.rb | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/lib/rubygems/package/tar_header.rb b/lib/rubygems/package/tar_header.rb index 0ebcbd789d4038..dd20d65080ff00 100644 --- a/lib/rubygems/package/tar_header.rb +++ b/lib/rubygems/package/tar_header.rb @@ -56,7 +56,7 @@ class Gem::Package::TarHeader ## # Pack format for a tar header - PACK_FORMAT = "a100" + # name + PACK_FORMAT = ("a100" + # name "a8" + # mode "a8" + # uid "a8" + # gid @@ -71,12 +71,12 @@ class Gem::Package::TarHeader "a32" + # gname "a8" + # devmajor "a8" + # devminor - "a155" # prefix + "a155").freeze # prefix ## # Unpack format for a tar header - UNPACK_FORMAT = "A100" + # name + UNPACK_FORMAT = ("A100" + # name "A8" + # mode "A8" + # uid "A8" + # gid @@ -91,7 +91,7 @@ class Gem::Package::TarHeader "A32" + # gname "A8" + # devmajor "A8" + # devminor - "A155" # prefix + "A155").freeze # prefix attr_reader(*FIELDS) diff --git a/test/rubygems/test_gem_package_tar_header.rb b/test/rubygems/test_gem_package_tar_header.rb index a3f95bb7704f91..34f92967e9905b 100644 --- a/test/rubygems/test_gem_package_tar_header.rb +++ b/test/rubygems/test_gem_package_tar_header.rb @@ -26,6 +26,25 @@ def setup @tar_header = Gem::Package::TarHeader.new header end + def test_decode_in_ractor + new_header = Ractor.new(@tar_header.to_s) do |str| + Gem::Package::TarHeader.from StringIO.new str + end.value + + assert_headers_equal @tar_header, new_header + end if defined?(Ractor) && Ractor.instance_methods.include?(:value) + + def test_encode_in_ractor + header_bytes = @tar_header.to_s + + new_header = Ractor.new(header_bytes) do |str| + header = Gem::Package::TarHeader.from StringIO.new str + header.to_s + end.value + + assert_headers_equal header_bytes, new_header + end if defined?(Ractor) && Ractor.instance_methods.include?(:value) + def test_self_from io = TempIO.new @tar_header.to_s From 6695a3b3333069ce220aa4732c75fb75efe90383 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Mon, 3 Nov 2025 18:45:00 -0600 Subject: [PATCH 6/7] [ruby/stringio] [DOC] Tweaks for StringIO#eof? (https://github.com/ruby/stringio/pull/160) https://github.com/ruby/stringio/commit/5034156245 --- ext/stringio/stringio.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index b96010dfbf3f96..9133c24266884c 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -699,10 +699,18 @@ strio_to_read(VALUE self) * call-seq: * eof? -> true or false * - * Returns +true+ if positioned at end-of-stream, +false+ otherwise; - * see {Position}[rdoc-ref:IO@Position]. + * Returns whether +self+ is positioned at end-of-stream: + * + * strio = StringIO.new('foo') + * strio.pos # => 0 + * strio.eof? # => false + * strio.read # => "foo" + * strio.pos # => 3 + * strio.eof? # => true + * strio.close_read + * strio.eof? # Raises IOError: not opened for reading * - * Raises IOError if the stream is not opened for reading. + * Related: StringIO#pos. */ static VALUE strio_eof(VALUE self) From 0d210f4d39c1cff71f34dbe9938345897f3e5eb1 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Mon, 3 Nov 2025 18:46:51 -0600 Subject: [PATCH 7/7] [ruby/stringio] [DOC] Tweaks for StringIO#external_encoding (https://github.com/ruby/stringio/pull/161) https://github.com/ruby/stringio/commit/92656f5c66 --- ext/stringio/stringio.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 9133c24266884c..cf3e06a71f130e 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -2047,12 +2047,20 @@ strio_truncate(VALUE self, VALUE len) } /* - * call-seq: - * strio.external_encoding => encoding + * call-seq: + * external_encoding -> encoding or nil + * + * Returns an Encoding object that represents the encoding of the string; + * see {Encoding}[rdoc-ref:Encoding]: + * + * strio = StringIO.new('foo') + * strio.external_encoding # => # + * + * Returns +nil+ if +self+ has no string and is in write mode: + * + * strio = StringIO.new(nil, 'w+') + * strio.external_encoding # => nil * - * Returns the Encoding object that represents the encoding of the file. - * If the stream is write mode and no encoding is specified, returns - * +nil+. */ static VALUE