From bf3d6a17ee44fcf1ec5a9335b55eb5b10288b859 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 28 Aug 2025 13:45:35 -0700 Subject: [PATCH 1/8] ZJIT: Add code_region_bytes stat (#14389) * ZJIT: Add code_region_bytes stat * Share more logic among --zjit and --zjit-stats --- zjit.rb | 1 + zjit/src/asm/mod.rs | 5 +++++ zjit/src/stats.rs | 22 +++++++++++++--------- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/zjit.rb b/zjit.rb index 41ce371e7e9da9..6b079b7b097e0e 100644 --- a/zjit.rb +++ b/zjit.rb @@ -46,6 +46,7 @@ def stats_string :gc_time_ns, :invalidation_time_ns, + :code_region_bytes, :side_exit_count, :total_insn_count, :vm_insn_count, diff --git a/zjit/src/asm/mod.rs b/zjit/src/asm/mod.rs index 5473998f9de99a..5e582ce28203ad 100644 --- a/zjit/src/asm/mod.rs +++ b/zjit/src/asm/mod.rs @@ -84,6 +84,11 @@ impl CodeBlock { } } + /// Size of the region in bytes that we have allocated physical memory for. + pub fn mapped_region_size(&self) -> usize { + self.mem_block.borrow().mapped_region_size() + } + /// Add an assembly comment if the feature is on. pub fn add_comment(&mut self, comment: &str) { if !self.keep_comments { diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 2be581fa2bfdf6..16010a1ea8dc5b 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -48,9 +48,8 @@ macro_rules! make_counters { $( Counter::$default_counter_name, )+ ]; - /// List of all counters - pub const ALL_COUNTERS: &'static [Counter] = &[ - $( Counter::$default_counter_name, )+ + /// List of counters that are available only for --zjit-stats. + pub const STATS_ONLY_COUNTERS: &'static [Counter] = &[ $( Counter::$counter_name, )+ ]; } @@ -187,16 +186,21 @@ pub extern "C" fn rb_zjit_stats(_ec: EcPtr, _self: VALUE, target_key: VALUE) -> Qnil }; - // If not --zjit-stats, set only default counters + // Set default counters + for &counter in DEFAULT_COUNTERS { + set_stat_usize!(hash, &counter.name(), unsafe { *counter_ptr(counter) }); + } + + // Memory usage stats + set_stat_usize!(hash, "code_region_bytes", ZJITState::get_code_block().mapped_region_size()); + + // End of default stats. Every counter beyond this is provided only for --zjit-stats. if !get_option!(stats) { - for &counter in DEFAULT_COUNTERS { - set_stat_usize!(hash, &counter.name(), unsafe { *counter_ptr(counter) }); - } return hash; } - // Set all counters for --zjit-stats - for &counter in ALL_COUNTERS { + // Set stats-only counters + for &counter in STATS_ONLY_COUNTERS { set_stat_usize!(hash, &counter.name(), unsafe { *counter_ptr(counter) }); } From b1dbcd3ce3cb61f7b136f4b1e1bb97a6f3635c3e Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 28 Aug 2025 16:17:46 +0200 Subject: [PATCH 2/8] YJIT simplify gen_get_iver and gen_set_ivar The `shape_id` now includes 3 bits for the `heap_id`. It is always non-zero for `T_OBJECT` and always zero for all other types. Hence all these allocator checks are no longer necessary. --- yjit/src/codegen.rs | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index dfd382a173b36b..10aeac2d85a7d6 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2848,24 +2848,12 @@ fn gen_get_ivar( recv: Opnd, recv_opnd: YARVOpnd, ) -> Option { - let comptime_val_klass = comptime_receiver.class_of(); - // If recv isn't already a register, load it. let recv = match recv { Opnd::InsnOut { .. } => recv, _ => asm.load(recv), }; - // Check if the comptime class uses a custom allocator - let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; - let uses_custom_allocator = match custom_allocator { - Some(alloc_fun) => { - let allocate_instance = rb_class_allocate_instance as *const u8; - alloc_fun as *const u8 != allocate_instance - } - None => false, - }; - // Check if the comptime receiver is a T_OBJECT let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; // Use a general C call at the last chain to avoid exits on megamorphic shapes @@ -2874,12 +2862,9 @@ fn gen_get_ivar( gen_counter_incr(jit, asm, Counter::num_getivar_megamorphic); } - // If the class uses the default allocator, instances should all be T_OBJECT - // NOTE: This assumes nobody changes the allocator of the class after allocation. - // Eventually, we can encode whether an object is T_OBJECT or not - // inside object shapes. + // NOTE: This assumes T_OBJECT can't ever have the same shape_id as any other type. // too-complex shapes can't use index access, so we use rb_ivar_get for them too. - if !receiver_t_object || uses_custom_allocator || comptime_receiver.shape_too_complex() || megamorphic { + if !receiver_t_object || comptime_receiver.shape_too_complex() || megamorphic { // General case. Call rb_ivar_get(). // VALUE rb_ivar_get(VALUE obj, ID id) asm_comment!(asm, "call rb_ivar_get()"); @@ -3073,8 +3058,6 @@ fn gen_set_ivar( recv_opnd: YARVOpnd, ic: Option<*const iseq_inline_iv_cache_entry>, ) -> Option { - let comptime_val_klass = comptime_receiver.class_of(); - // If the comptime receiver is frozen, writing an IV will raise an exception // and we don't want to JIT code to deal with that situation. if comptime_receiver.is_frozen() { @@ -3084,16 +3067,6 @@ fn gen_set_ivar( let stack_type = asm.ctx.get_opnd_type(StackOpnd(0)); - // Check if the comptime class uses a custom allocator - let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; - let uses_custom_allocator = match custom_allocator { - Some(alloc_fun) => { - let allocate_instance = rb_class_allocate_instance as *const u8; - alloc_fun as *const u8 != allocate_instance - } - None => false, - }; - // Check if the comptime receiver is a T_OBJECT let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; // Use a general C call at the last chain to avoid exits on megamorphic shapes @@ -3149,10 +3122,9 @@ fn gen_set_ivar( None }; - // If the receiver isn't a T_OBJECT, or uses a custom allocator, - // then just write out the IV write as a function call. + // If the receiver isn't a T_OBJECT, then just write out the IV write as a function call. // too-complex shapes can't use index access, so we use rb_ivar_get for them too. - if !receiver_t_object || uses_custom_allocator || shape_too_complex || new_shape_too_complex || megamorphic { + if !receiver_t_object || shape_too_complex || new_shape_too_complex || megamorphic { // The function could raise FrozenError. // Note that this modifies REG_SP, which is why we do it first jit_prepare_non_leaf_call(jit, asm); From b6d4882c05d64aa6cb5ff8761584ae910de67f21 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 28 Aug 2025 19:29:46 +0200 Subject: [PATCH 3/8] YJIT: getinstancevariable cache indexes for types other than T_OBJECT While accessing the ivars of other types is too complicated to realistically generate the ASM for it, we can at least provide the ivar index as to not have to lookup the shape tree every time. ``` compare-ruby: ruby 3.5.0dev (2025-08-27T14:58:58Z merge-vm-setivar-d.. 5b749d8e53) +YJIT +PRISM [arm64-darwin24] built-ruby: ruby 3.5.0dev (2025-08-28T17:58:32Z yjit-get-exivar efaa8c9b09) +YJIT +PRISM [arm64-darwin24] | |compare-ruby|built-ruby| |:--------------------------|-----------:|---------:| |vm_ivar_get_on_obj | 930.458| 936.865| | | -| 1.01x| |vm_ivar_get_on_class | 134.471| 431.622| | | -| 3.21x| |vm_ivar_get_on_generic | 146.679| 284.408| | | -| 1.94x| ``` Co-Authored-By: Aaron Patterson --- benchmark/vm_ivar_get.yml | 67 +++++++++++++++++++++++++++++++++- internal/variable.h | 1 + shape.h | 1 + variable.c | 30 +++++++++++++++ yjit/bindgen/src/main.rs | 1 + yjit/src/codegen.rs | 48 ++++++++++++------------ yjit/src/cruby_bindings.inc.rs | 1 + 7 files changed, 124 insertions(+), 25 deletions(-) diff --git a/benchmark/vm_ivar_get.yml b/benchmark/vm_ivar_get.yml index 9174af6965490c..1e0dad665fa2c3 100644 --- a/benchmark/vm_ivar_get.yml +++ b/benchmark/vm_ivar_get.yml @@ -1,17 +1,75 @@ prelude: | class Example def initialize + @levar = 1 @v0 = 1 @v1 = 2 @v3 = 3 + end + + def get_value_loop + sum = 0 + + i = 0 + while i < 100_000 + # 10 times to de-emphasize loop overhead + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + i += 1 + end + + return sum + end + + @levar = 1 + @v0 = 1 + @v1 = 2 + @v3 = 3 + + def self.get_value_loop + sum = 0 + + i = 0 + while i < 100_000 + # 10 times to de-emphasize loop overhead + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + sum += @levar + i += 1 + end + + return sum + end + end + + class GenExample < Time + def initialize @levar = 1 + @v0 = 1 + @v1 = 2 + @v3 = 3 end def get_value_loop sum = 0 i = 0 - while i < 1000000 + while i < 100_000 # 10 times to de-emphasize loop overhead sum += @levar sum += @levar @@ -31,7 +89,12 @@ prelude: | end obj = Example.new + gen = GenExample.new benchmark: - vm_ivar_get: | + vm_ivar_get_on_obj: | obj.get_value_loop + vm_ivar_get_on_class: | + Example.get_value_loop + vm_ivar_get_on_generic: | + gen.get_value_loop loop_count: 100 diff --git a/internal/variable.h b/internal/variable.h index 2cb50f66ae9ef4..d2563301686c03 100644 --- a/internal/variable.h +++ b/internal/variable.h @@ -53,6 +53,7 @@ VALUE rb_obj_field_get(VALUE obj, shape_id_t target_shape_id); void rb_ivar_set_internal(VALUE obj, ID id, VALUE val); attr_index_t rb_ivar_set_index(VALUE obj, ID id, VALUE val); attr_index_t rb_obj_field_set(VALUE obj, shape_id_t target_shape_id, ID field_name, VALUE val); +VALUE rb_ivar_get_at(VALUE obj, attr_index_t index, ID id); RUBY_SYMBOL_EXPORT_BEGIN /* variable.c (export) */ diff --git a/shape.h b/shape.h index 1f444c545f2a3a..fdc2b3ddd6ff80 100644 --- a/shape.h +++ b/shape.h @@ -24,6 +24,7 @@ STATIC_ASSERT(shape_id_num_bits, SHAPE_ID_NUM_BITS == sizeof(shape_id_t) * CHAR_ // index in rb_shape_tree.shape_list. Allow to access `rb_shape_t *`. // 19-21 SHAPE_ID_HEAP_INDEX_MASK // index in rb_shape_tree.capacities. Allow to access slot size. +// Always 0 except for T_OBJECT. // 22 SHAPE_ID_FL_FROZEN // Whether the object is frozen or not. // 23 SHAPE_ID_FL_HAS_OBJECT_ID diff --git a/variable.c b/variable.c index f80f1a56a6051a..0a5ec60c25e91f 100644 --- a/variable.c +++ b/variable.c @@ -1463,6 +1463,36 @@ rb_ivar_get(VALUE obj, ID id) return iv; } +VALUE +rb_ivar_get_at(VALUE obj, attr_index_t index, ID id) +{ + RUBY_ASSERT(rb_is_instance_id(id)); + // Used by JITs, but never for T_OBJECT. + + switch (BUILTIN_TYPE(obj)) { + case T_OBJECT: + UNREACHABLE_RETURN(Qundef); + case T_CLASS: + case T_MODULE: + { + VALUE fields_obj = RCLASS_WRITABLE_FIELDS_OBJ(obj); + VALUE val = rb_imemo_fields_ptr(fields_obj)[index]; + + if (UNLIKELY(!rb_ractor_main_p()) && !rb_ractor_shareable_p(val)) { + rb_raise(rb_eRactorIsolationError, + "can not get unshareable values from instance variables of classes/modules from non-main Ractors"); + } + + return val; + } + default: + { + VALUE fields_obj = rb_obj_fields(obj, id); + return rb_imemo_fields_ptr(fields_obj)[index]; + } + } +} + VALUE rb_attr_get(VALUE obj, ID id) { diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index dd0cb6dbf523d7..c1114e8089f25b 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -313,6 +313,7 @@ fn main() { // From yjit.c .allowlist_function("rb_object_shape_count") + .allowlist_function("rb_ivar_get_at") .allowlist_function("rb_iseq_(get|set)_yjit_payload") .allowlist_function("rb_iseq_pc_at_idx") .allowlist_function("rb_iseq_opcode_at_pc") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 10aeac2d85a7d6..f27d09d7d46d0d 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2864,7 +2864,7 @@ fn gen_get_ivar( // NOTE: This assumes T_OBJECT can't ever have the same shape_id as any other type. // too-complex shapes can't use index access, so we use rb_ivar_get for them too. - if !receiver_t_object || comptime_receiver.shape_too_complex() || megamorphic { + if !comptime_receiver.heap_object_p() || comptime_receiver.shape_too_complex() || megamorphic { // General case. Call rb_ivar_get(). // VALUE rb_ivar_get(VALUE obj, ID id) asm_comment!(asm, "call rb_ivar_get()"); @@ -2900,9 +2900,6 @@ fn gen_get_ivar( // Guard heap object (recv_opnd must be used before stack_pop) guard_object_is_heap(asm, recv, recv_opnd, Counter::getivar_not_heap); - // Compile time self is embedded and the ivar index lands within the object - let embed_test_result = comptime_receiver.embedded_p(); - let expected_shape = unsafe { rb_obj_shape_id(comptime_receiver) }; let shape_id_offset = unsafe { rb_shape_id_offset() }; let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); @@ -2931,28 +2928,33 @@ fn gen_get_ivar( asm.mov(out_opnd, Qnil.into()); } Some(ivar_index) => { - if embed_test_result { - // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h - - // Load the variable - let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; - let ivar_opnd = Opnd::mem(64, recv, offs); - - // Push the ivar on the stack - let out_opnd = asm.stack_push(Type::Unknown); - asm.mov(out_opnd, ivar_opnd); + let ivar_opnd = if receiver_t_object { + if comptime_receiver.embedded_p() { + // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h + + // Load the variable + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; + Opnd::mem(64, recv, offs) + } else { + // Compile time value is *not* embedded. + + // Get a pointer to the extended table + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32)); + + // Read the ivar from the extended table + Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32) + } } else { - // Compile time value is *not* embedded. + asm_comment!(asm, "call rb_ivar_get_at()"); - // Get a pointer to the extended table - let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_FIELDS as i32)); - - // Read the ivar from the extended table - let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); + // The function could raise RactorIsolationError. + jit_prepare_non_leaf_call(jit, asm); + asm.ccall(rb_ivar_get_at as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into()), Opnd::UImm(ivar_name)]) + }; - let out_opnd = asm.stack_push(Type::Unknown); - asm.mov(out_opnd, ivar_opnd); - } + // Push the ivar on the stack + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_opnd); } } diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 4cae138c9581a8..f78354a6116731 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1115,6 +1115,7 @@ extern "C" { pub fn rb_obj_shape_id(obj: VALUE) -> shape_id_t; pub fn rb_shape_get_iv_index(shape_id: shape_id_t, id: ID, value: *mut attr_index_t) -> bool; pub fn rb_shape_transition_add_ivar_no_warnings(obj: VALUE, id: ID) -> shape_id_t; + pub fn rb_ivar_get_at(obj: VALUE, index: attr_index_t, id: ID) -> VALUE; pub fn rb_gvar_get(arg1: ID) -> VALUE; pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE; pub fn rb_ensure_iv_list_size(obj: VALUE, current_len: u32, newsize: u32); From 3646596e5bd4c445b158be5fd3780e7685aa9b7f Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 28 Aug 2025 22:53:11 +0200 Subject: [PATCH 4/8] YJIT: rb_ivar_get_at assume leaf-call when single ractor The only exception it could raise is if we're in multi ractor mode. --- yjit/src/codegen.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index f27d09d7d46d0d..ff001b647641c9 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2947,8 +2947,10 @@ fn gen_get_ivar( } else { asm_comment!(asm, "call rb_ivar_get_at()"); - // The function could raise RactorIsolationError. - jit_prepare_non_leaf_call(jit, asm); + if !assume_single_ractor_mode(jit, asm) { + // The function could raise RactorIsolationError. + jit_prepare_non_leaf_call(jit, asm); + } asm.ccall(rb_ivar_get_at as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into()), Opnd::UImm(ivar_name)]) }; From 4992d2c2980d150cdbfc9750f124c3a9ccc71945 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 28 Aug 2025 23:23:38 +0200 Subject: [PATCH 5/8] YJIT: rb_ivar_get_at skip ractor checks Using `assume_single_ractor_mode` we can skip all ractor safety checks if we're in single ractor mode. ``` compare-ruby: ruby 3.5.0dev (2025-08-27T14:58:58Z merge-vm-setivar-d.. 5b749d8e53) +YJIT +PRISM [arm64-darwin24] built-ruby: ruby 3.5.0dev (2025-08-28T21:23:38Z yjit-get-exivar 3cc21b76d4) +YJIT +PRISM [arm64-darwin24] | |compare-ruby|built-ruby| |:--------------------------|-----------:|---------:| |vm_ivar_get_on_obj | 975.981| 975.772| | | 1.00x| -| |vm_ivar_get_on_class | 136.214| 470.912| | | -| 3.46x| |vm_ivar_get_on_generic | 148.315| 299.122| | | -| 2.02x| ``` --- internal/variable.h | 1 + variable.c | 20 ++++++++++++++++++++ yjit/bindgen/src/main.rs | 1 + yjit/src/codegen.rs | 6 ++++-- yjit/src/cruby_bindings.inc.rs | 1 + 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/internal/variable.h b/internal/variable.h index d2563301686c03..5e2bcceb61875c 100644 --- a/internal/variable.h +++ b/internal/variable.h @@ -54,6 +54,7 @@ void rb_ivar_set_internal(VALUE obj, ID id, VALUE val); attr_index_t rb_ivar_set_index(VALUE obj, ID id, VALUE val); attr_index_t rb_obj_field_set(VALUE obj, shape_id_t target_shape_id, ID field_name, VALUE val); VALUE rb_ivar_get_at(VALUE obj, attr_index_t index, ID id); +VALUE rb_ivar_get_at_no_ractor_check(VALUE obj, attr_index_t index); RUBY_SYMBOL_EXPORT_BEGIN /* variable.c (export) */ diff --git a/variable.c b/variable.c index 0a5ec60c25e91f..cccb09e329dbd4 100644 --- a/variable.c +++ b/variable.c @@ -1493,6 +1493,26 @@ rb_ivar_get_at(VALUE obj, attr_index_t index, ID id) } } +VALUE +rb_ivar_get_at_no_ractor_check(VALUE obj, attr_index_t index) +{ + // Used by JITs, but never for T_OBJECT. + + VALUE fields_obj; + switch (BUILTIN_TYPE(obj)) { + case T_OBJECT: + UNREACHABLE_RETURN(Qundef); + case T_CLASS: + case T_MODULE: + fields_obj = RCLASS_WRITABLE_FIELDS_OBJ(obj); + break; + default: + fields_obj = rb_obj_fields_no_ractor_check(obj); + break; + } + return rb_imemo_fields_ptr(fields_obj)[index]; +} + VALUE rb_attr_get(VALUE obj, ID id) { diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index c1114e8089f25b..2993e05e2f22ee 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -314,6 +314,7 @@ fn main() { // From yjit.c .allowlist_function("rb_object_shape_count") .allowlist_function("rb_ivar_get_at") + .allowlist_function("rb_ivar_get_at_no_ractor_check") .allowlist_function("rb_iseq_(get|set)_yjit_payload") .allowlist_function("rb_iseq_pc_at_idx") .allowlist_function("rb_iseq_opcode_at_pc") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ff001b647641c9..2dae72280fae69 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2947,11 +2947,13 @@ fn gen_get_ivar( } else { asm_comment!(asm, "call rb_ivar_get_at()"); - if !assume_single_ractor_mode(jit, asm) { + if assume_single_ractor_mode(jit, asm) { + asm.ccall(rb_ivar_get_at_no_ractor_check as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into())]) + } else { // The function could raise RactorIsolationError. jit_prepare_non_leaf_call(jit, asm); + asm.ccall(rb_ivar_get_at as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into()), Opnd::UImm(ivar_name)]) } - asm.ccall(rb_ivar_get_at as *const u8, vec![recv, Opnd::UImm((ivar_index as u32).into()), Opnd::UImm(ivar_name)]) }; // Push the ivar on the stack diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index f78354a6116731..ca459ceafbbe7a 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1116,6 +1116,7 @@ extern "C" { pub fn rb_shape_get_iv_index(shape_id: shape_id_t, id: ID, value: *mut attr_index_t) -> bool; pub fn rb_shape_transition_add_ivar_no_warnings(obj: VALUE, id: ID) -> shape_id_t; pub fn rb_ivar_get_at(obj: VALUE, index: attr_index_t, id: ID) -> VALUE; + pub fn rb_ivar_get_at_no_ractor_check(obj: VALUE, index: attr_index_t) -> VALUE; pub fn rb_gvar_get(arg1: ID) -> VALUE; pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE; pub fn rb_ensure_iv_list_size(obj: VALUE, current_len: u32, newsize: u32); From 3cc66977b2675c55a281f923582eadaf2eae3244 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 27 Aug 2025 13:35:20 -0700 Subject: [PATCH 6/8] ZJIT: Compile Insn::Send --- test/ruby/test_zjit.rb | 6 +----- zjit/src/codegen.rs | 34 ++++++++++++++++++++++++++++++++-- zjit/src/hir.rs | 20 ++++++++++++++++---- 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 3ff1392ed71887..2642467d73e9b9 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -251,8 +251,6 @@ def test_nested_local_access end def test_read_local_written_by_children_iseqs - omit "This test fails right now because Send doesn't compile." - assert_compiles '[1, 2]', %q{ def test l1 = nil @@ -1369,15 +1367,13 @@ def test_defined_yield_from_block # This will do some EP hopping to find the local EP, # so it's slightly different than doing it outside of a block. - omit 'Test fails at the moment due to missing Send codegen' - assert_compiles '[nil, nil, "yield"]', %q{ def test yield_self { yield_self { defined?(yield) } } end [test, test, test{}] - }, call_threshold: 2, insns: [:defined] + }, call_threshold: 2 end def test_invokeblock_without_block_after_jit_call diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 6c55b3fb5db5eb..3a7194ec399983 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -355,6 +355,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)), Insn::IfTrue { val, target } => no_output!(gen_if_true(jit, asm, opnd!(val), target)), Insn::IfFalse { val, target } => no_output!(gen_if_false(jit, asm, opnd!(val), target)), + &Insn::Send { cd, blockiseq, state, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state)), Insn::SendWithoutBlock { cd, state, .. } => gen_send_without_block(jit, asm, *cd, &function.frame_state(*state)), // Give up SendWithoutBlockDirect for 6+ args since asm.ccall() doesn't support it. Insn::SendWithoutBlockDirect { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self @@ -407,7 +408,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::ArrayMax { state, .. } | &Insn::FixnumDiv { state, .. } | &Insn::FixnumMod { state, .. } - | &Insn::Send { state, .. } | &Insn::Throw { state, .. } => return Err(state), }; @@ -883,6 +883,36 @@ fn gen_if_false(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, branch: asm.write_label(if_true); } +/// Compile a dynamic dispatch with block +fn gen_send( + jit: &mut JITState, + asm: &mut Assembler, + cd: *const rb_call_data, + blockiseq: IseqPtr, + state: &FrameState, +) -> lir::Opnd { + // Save PC and SP + gen_save_pc(asm, state); + gen_save_sp(asm, state.stack().len()); + + // Spill locals and stack + gen_spill_locals(jit, asm, state); + gen_spill_stack(jit, asm, state); + + asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); + unsafe extern "C" { + fn rb_vm_send(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + let ret = asm.ccall( + rb_vm_send as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ); + // TODO: Add a PatchPoint here that can side-exit the function if the callee messed with + // the frame's locals + + ret +} + /// Compile a dynamic dispatch without block fn gen_send_without_block( jit: &mut JITState, @@ -1383,7 +1413,7 @@ fn param_opnd(idx: usize) -> Opnd { } /// Inverse of ep_offset_to_local_idx(). See ep_offset_to_local_idx() for details. -fn local_idx_to_ep_offset(iseq: IseqPtr, local_idx: usize) -> i32 { +pub fn local_idx_to_ep_offset(iseq: IseqPtr, local_idx: usize) -> i32 { let local_size = unsafe { get_iseq_body_local_table_size(iseq) }; local_size_and_idx_to_ep_offset(local_size as usize, local_idx) } diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index ee90032ee870c1..fdefe23e127cb1 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -4,7 +4,7 @@ #![allow(non_upper_case_globals)] use crate::{ - cast::IntoUsize, cruby::*, gc::{get_or_create_iseq_payload, IseqPayload}, options::{get_option, DumpHIR}, state::ZJITState + cast::IntoUsize, codegen::local_idx_to_ep_offset, cruby::*, gc::{get_or_create_iseq_payload, IseqPayload}, options::{get_option, DumpHIR}, state::ZJITState }; use std::{ cell::RefCell, collections::{HashMap, HashSet, VecDeque}, ffi::{c_int, c_void, CStr}, fmt::Display, mem::{align_of, size_of}, ptr, slice::Iter @@ -3138,7 +3138,9 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let ep_offset = get_arg(pc, 0).as_u32(); if iseq_type == ISEQ_TYPE_EVAL || has_send { // On eval, the locals are always on the heap, so read the local using EP. - state.stack_push(fun.push_insn(block, Insn::GetLocal { ep_offset, level: 0 })); + let val = fun.push_insn(block, Insn::GetLocal { ep_offset, level: 0 }); + state.setlocal(ep_offset, val); + state.stack_push(val); } else { // TODO(alan): This implementation doesn't read from EP, so will miss writes // from nested ISeqs. This will need to be amended when we add codegen for @@ -3324,6 +3326,15 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); let send = fun.push_insn(block, Insn::Send { self_val: recv, cd, blockiseq, args, state: exit_id }); state.stack_push(send); + + // Reload locals that may have been modified by the blockiseq. + // TODO: Avoid reloading locals that are not referenced by the blockiseq + // or not used after this. Max thinks we could eventually DCE them. + for local_idx in 0..state.locals.len() { + let ep_offset = local_idx_to_ep_offset(iseq, local_idx) as u32; + let val = fun.push_insn(block, Insn::GetLocal { ep_offset, level: 0 }); + state.setlocal(ep_offset, val); + } } YARVINSN_getglobal => { let id = ID(get_arg(pc, 0).as_u64()); @@ -4698,6 +4709,7 @@ mod tests { bb0(v0:BasicObject, v1:BasicObject): v3:BasicObject = GetLocal l0, EP@3 v5:BasicObject = Send v3, 0x1000, :each + v6:BasicObject = GetLocal l0, EP@3 CheckInterrupts Return v5 "); @@ -7246,9 +7258,9 @@ mod opt_tests { v3:Fixnum[1] = Const Value(1) SetLocal l0, EP@3, v3 v6:BasicObject = Send v0, 0x1000, :foo - v7:BasicObject = GetLocal l0, EP@3 + v8:BasicObject = GetLocal l0, EP@3 CheckInterrupts - Return v7 + Return v8 "); } From 51cd8776367a4f17b77202e171eeaf4681c79389 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Thu, 28 Aug 2025 15:19:04 -0700 Subject: [PATCH 7/8] ZJIT: Add missing rb_zjit_cme_invalidate --- test/ruby/test_zjit.rb | 17 +++++++++++++++++ vm_method.c | 11 +++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 2642467d73e9b9..c11f7bfa9a7106 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1665,6 +1665,23 @@ def foo }, call_threshold: 2 end + def test_method_redefinition_with_module + assert_runs '["original", "redefined"]', %q{ + module Foo + def self.foo = "original" + end + + def test = Foo.foo + test + result1 = test + + def Foo.foo = "redefined" + result2 = test + + [result1, result2] + }, call_threshold: 2 + end + def test_module_name_with_guard_passes assert_compiles '"Integer"', %q{ def test(mod) diff --git a/vm_method.c b/vm_method.c index fb217ef43de617..7295c74c7b6369 100644 --- a/vm_method.c +++ b/vm_method.c @@ -355,6 +355,7 @@ invalidate_method_cache_in_cc_table(VALUE tbl, ID mid) if (tbl && rb_managed_id_table_lookup(tbl, mid, &ccs_data)) { struct rb_class_cc_entries *ccs = (struct rb_class_cc_entries *)ccs_data; rb_yjit_cme_invalidate((rb_callable_method_entry_t *)ccs->cme); + rb_zjit_cme_invalidate((rb_callable_method_entry_t *)ccs->cme); if (NIL_P(ccs->cme->owner)) invalidate_negative_cache(mid); rb_vm_ccs_invalidate_and_free(ccs); rb_managed_id_table_delete(tbl, mid); @@ -367,9 +368,8 @@ invalidate_callable_method_entry_in_callable_m_table(struct rb_id_table *tbl, ID { VALUE cme; if (tbl && rb_id_table_lookup(tbl, mid, &cme)) { - if (rb_yjit_enabled_p) { - rb_yjit_cme_invalidate((rb_callable_method_entry_t *)cme); - } + rb_yjit_cme_invalidate((rb_callable_method_entry_t *)cme); + rb_zjit_cme_invalidate((rb_callable_method_entry_t *)cme); rb_id_table_delete(tbl, mid); RB_DEBUG_COUNTER_INC(cc_invalidate_leaf_callable); } @@ -413,9 +413,8 @@ invalidate_complemented_method_entry_in_callable_m_table(struct rb_id_table *tbl { VALUE cme; if (tbl && rb_id_table_lookup(tbl, mid, &cme)) { - if (rb_yjit_enabled_p) { - rb_yjit_cme_invalidate((rb_callable_method_entry_t *)cme); - } + rb_yjit_cme_invalidate((rb_callable_method_entry_t *)cme); + rb_zjit_cme_invalidate((rb_callable_method_entry_t *)cme); rb_id_table_delete(tbl, mid); RB_DEBUG_COUNTER_INC(cc_invalidate_tree_callable); } From 85eb33d9ffb3f415cefe7f364ff72a238205a46f Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 29 Aug 2025 09:33:17 +0900 Subject: [PATCH 8/8] Relax thresholds for array, object, and string compaction verification These tests failed with RHEL10 https://rubyci.s3.amazonaws.com/rhel10/ruby-master/log/20250828T093003Z.fail.html.gz --- test/ruby/test_gc_compact.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/ruby/test_gc_compact.rb b/test/ruby/test_gc_compact.rb index 3eaa93dfaea0bb..32b0a5def0b8f5 100644 --- a/test/ruby/test_gc_compact.rb +++ b/test/ruby/test_gc_compact.rb @@ -324,7 +324,7 @@ def test_moving_arrays_up_heaps }.resume stats = GC.verify_compaction_references(expand_heap: true, toward: :empty) - assert_operator(stats.dig(:moved_up, :T_ARRAY) || 0, :>=, ARY_COUNT - 10) + assert_operator(stats.dig(:moved_up, :T_ARRAY) || 0, :>=, ARY_COUNT - 15) refute_empty($arys.keep_if { |o| ObjectSpace.dump(o).include?('"embedded":true') }) end; end @@ -356,7 +356,7 @@ def add_ivars stats = GC.verify_compaction_references(expand_heap: true, toward: :empty) - assert_operator(stats.dig(:moved_up, :T_OBJECT) || 0, :>=, OBJ_COUNT - 10) + assert_operator(stats.dig(:moved_up, :T_OBJECT) || 0, :>=, OBJ_COUNT - 15) refute_empty($ary.keep_if { |o| ObjectSpace.dump(o).include?('"embedded":true') }) end; end @@ -377,7 +377,7 @@ def test_moving_strings_up_heaps stats = GC.verify_compaction_references(expand_heap: true, toward: :empty) - assert_operator(stats[:moved_up][:T_STRING], :>=, STR_COUNT - 10) + assert_operator(stats[:moved_up][:T_STRING], :>=, STR_COUNT - 15) refute_empty($ary.keep_if { |o| ObjectSpace.dump(o).include?('"embedded":true') }) end; end