From e7fb87ee3a826a387e5bfb9edea059f1aa065462 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 27 Aug 2025 18:13:16 +0200 Subject: [PATCH 1/2] Populate ivar caches for types other than T_OBJECT `vm_setinstancevariable` had a codepath to try to match the inline cache for types other than T_OBJECT, but the cache population path in `vm_setivar_slowpath` was exclusive to T_OBJECT, so `vm_setivar_default` would never match anything. This commit improves `vm_setivar_slowpath` so that it is capable of filling the cache for all types, and adds a `vm_setivar_class` codepath for `T_CLASS` and `T_MODULE`. `vm_setivar`, `vm_setivar_default` and `vm_setivar_class` could be unified, but based on the very explicit `NOINLINE` I assume they were split to minimize codesize. ``` compare-ruby: ruby 3.5.0dev (2025-08-27T14:58:58Z merge-vm-setivar-d.. 5b749d8e53) +PRISM [arm64-darwin24] built-ruby: ruby 3.5.0dev (2025-08-27T16:30:31Z setivar-cache-gene.. 4fe78ff296) +PRISM [arm64-darwin24] | |compare-ruby|built-ruby| |:------------------------|-----------:|---------:| |vm_ivar_set_on_instance | 161.809| 164.688| | | -| 1.02x| |vm_ivar_set_on_generic | 58.769| 115.638| | | -| 1.97x| |vm_ivar_set_on_class | 70.034| 141.042| | | -| 2.01x| ``` --- benchmark/vm_ivar_set_on_instance.yml | 63 +++++++++++++++++++- internal/variable.h | 4 +- test/ruby/test_variable.rb | 55 +++++++++++++++++ variable.c | 86 ++++++++++++++++----------- vm_insnhelper.c | 76 +++++++++++++++++++---- 5 files changed, 233 insertions(+), 51 deletions(-) diff --git a/benchmark/vm_ivar_set_on_instance.yml b/benchmark/vm_ivar_set_on_instance.yml index 91857b7742e0f2..6ce53a86ecd4b9 100644 --- a/benchmark/vm_ivar_set_on_instance.yml +++ b/benchmark/vm_ivar_set_on_instance.yml @@ -1,16 +1,44 @@ prelude: | class TheClass def initialize + @levar = 1 @v0 = 1 @v1 = 2 @v3 = 3 + end + + def set_value_loop + # 100k + i = 0 + while i < 100_000 + # 10 times to de-emphasize loop overhead + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + i += 1 + end + end + end + + class Generic < Time + def initialize @levar = 1 + @v0 = 1 + @v1 = 2 + @v3 = 3 end def set_value_loop - # 1M + # 100k i = 0 - while i < 1000000 + while i < 100_000 # 10 times to de-emphasize loop overhead @levar = i @levar = i @@ -28,8 +56,39 @@ prelude: | end obj = TheClass.new + gen_obj = Generic.new + + class SomeClass + @levar = 1 + @v0 = 1 + @v1 = 2 + @v3 = 3 + + def self.set_value_loop + # 100k + i = 0 + while i < 100_000 + # 10 times to de-emphasize loop overhead + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + @levar = i + i += 1 + end + end + end benchmark: vm_ivar_set_on_instance: | obj.set_value_loop + vm_ivar_set_on_generic: | + gen_obj.set_value_loop + vm_ivar_set_on_class: | + SomeClass.set_value_loop loop_count: 100 diff --git a/internal/variable.h b/internal/variable.h index bad63981a18b2b..2cb50f66ae9ef4 100644 --- a/internal/variable.h +++ b/internal/variable.h @@ -51,7 +51,8 @@ void rb_obj_init_too_complex(VALUE obj, st_table *table); void rb_evict_ivars_to_hash(VALUE obj); VALUE rb_obj_field_get(VALUE obj, shape_id_t target_shape_id); void rb_ivar_set_internal(VALUE obj, ID id, VALUE val); -void rb_obj_field_set(VALUE obj, shape_id_t target_shape_id, ID field_name, VALUE val); +attr_index_t rb_ivar_set_index(VALUE obj, ID id, VALUE val); +attr_index_t rb_obj_field_set(VALUE obj, shape_id_t target_shape_id, ID field_name, VALUE val); RUBY_SYMBOL_EXPORT_BEGIN /* variable.c (export) */ @@ -67,6 +68,5 @@ VALUE rb_gvar_set(ID, VALUE); VALUE rb_gvar_defined(ID); void rb_const_warn_if_deprecated(const rb_const_entry_t *, VALUE, ID); void rb_ensure_iv_list_size(VALUE obj, uint32_t current_len, uint32_t newsize); -attr_index_t rb_obj_ivar_set(VALUE obj, ID id, VALUE val); #endif /* INTERNAL_VARIABLE_H */ diff --git a/test/ruby/test_variable.rb b/test/ruby/test_variable.rb index 3504b9b7dc3131..df0c6f1f094578 100644 --- a/test/ruby/test_variable.rb +++ b/test/ruby/test_variable.rb @@ -388,6 +388,61 @@ def test_special_constant_ivars end end + class RemoveIvar + class << self + attr_reader :ivar + + def add_ivar + @ivar = 1 + end + end + + attr_reader :ivar + + def add_ivar + @ivar = 1 + end + end + + def add_and_remove_ivar(obj) + assert_nil obj.ivar + assert_equal 1, obj.add_ivar + assert_equal 1, obj.instance_variable_get(:@ivar) + assert_equal 1, obj.ivar + + obj.remove_instance_variable(:@ivar) + assert_nil obj.ivar + + assert_raise NameError do + obj.remove_instance_variable(:@ivar) + end + end + + def test_remove_instance_variables_object + obj = RemoveIvar.new + add_and_remove_ivar(obj) + add_and_remove_ivar(obj) + end + + def test_remove_instance_variables_class + add_and_remove_ivar(RemoveIvar) + add_and_remove_ivar(RemoveIvar) + end + + class RemoveIvarGeneric < Array + attr_reader :ivar + + def add_ivar + @ivar = 1 + end + end + + def test_remove_instance_variables_generic + obj = RemoveIvarGeneric.new + add_and_remove_ivar(obj) + add_and_remove_ivar(obj) + end + class ExIvar < Hash def initialize @a = 1 diff --git a/variable.c b/variable.c index b0c364af595d78..8964b8e5edd9b2 100644 --- a/variable.c +++ b/variable.c @@ -1300,7 +1300,7 @@ rb_free_generic_ivar(VALUE obj) } } -void +static void rb_obj_set_fields(VALUE obj, VALUE fields_obj, ID field_name, VALUE original_fields_obj) { ivar_ractor_check(obj, field_name); @@ -1555,6 +1555,10 @@ rb_ivar_delete(VALUE obj, ID id, VALUE undef) else { val = rb_ivar_delete(fields_obj, id, undef); } + // TODO: What should we set as the T_CLASS shape_id? + // In most case we can replicate the single `fields_obj` shape + // but in namespaced case? Perhaps INVALID_SHAPE_ID? + RBASIC_SET_SHAPE_ID(obj, RBASIC_SHAPE_ID(fields_obj)); } return val; } @@ -1764,7 +1768,7 @@ imemo_fields_set(VALUE owner, VALUE fields_obj, shape_id_t target_shape_id, ID f return fields_obj; } -static void +static attr_index_t generic_field_set(VALUE obj, shape_id_t target_shape_id, ID field_name, VALUE val) { if (!field_name) { @@ -1776,6 +1780,7 @@ generic_field_set(VALUE obj, shape_id_t target_shape_id, ID field_name, VALUE va VALUE fields_obj = imemo_fields_set(obj, original_fields_obj, target_shape_id, field_name, val, false); rb_obj_set_fields(obj, fields_obj, field_name, original_fields_obj); + return rb_shape_too_complex_p(target_shape_id) ? ATTR_INDEX_NOT_SET : RSHAPE_INDEX(target_shape_id); } static shape_id_t @@ -1800,12 +1805,12 @@ generic_shape_ivar(VALUE obj, ID id, bool *new_ivar_out) return target_shape_id; } -static void +static attr_index_t generic_ivar_set(VALUE obj, ID id, VALUE val) { bool dontcare; shape_id_t target_shape_id = generic_shape_ivar(obj, id, &dontcare); - generic_field_set(obj, target_shape_id, id, val); + return generic_field_set(obj, target_shape_id, id, val); } void @@ -1886,8 +1891,8 @@ obj_field_set(VALUE obj, shape_id_t target_shape_id, ID field_name, VALUE val) } } -attr_index_t -rb_obj_ivar_set(VALUE obj, ID id, VALUE val) +static attr_index_t +obj_ivar_set(VALUE obj, ID id, VALUE val) { bool dontcare; shape_id_t target_shape_id = generic_shape_ivar(obj, id, &dontcare); @@ -1902,7 +1907,7 @@ VALUE rb_vm_set_ivar_id(VALUE obj, ID id, VALUE val) { rb_check_frozen(obj); - rb_obj_ivar_set(obj, id, val); + obj_ivar_set(obj, id, val); return val; } @@ -1922,26 +1927,25 @@ void rb_obj_freeze_inline(VALUE x) } } -static void +static attr_index_t class_ivar_set(VALUE obj, ID id, VALUE val, bool *new_ivar); + +static attr_index_t ivar_set(VALUE obj, ID id, VALUE val) { RB_DEBUG_COUNTER_INC(ivar_set_base); switch (BUILTIN_TYPE(obj)) { case T_OBJECT: - { - rb_obj_ivar_set(obj, id, val); - break; - } + return obj_ivar_set(obj, id, val); case T_CLASS: case T_MODULE: - IVAR_ACCESSOR_SHOULD_BE_MAIN_RACTOR(id); - rb_class_ivar_set(obj, id, val); - - break; + { + IVAR_ACCESSOR_SHOULD_BE_MAIN_RACTOR(id); + bool dontcare; + return class_ivar_set(obj, id, val, &dontcare); + } default: - generic_ivar_set(obj, id, val); - break; + return generic_ivar_set(obj, id, val); } } @@ -1953,6 +1957,12 @@ rb_ivar_set(VALUE obj, ID id, VALUE val) return val; } +attr_index_t +rb_ivar_set_index(VALUE obj, ID id, VALUE val) +{ + return ivar_set(obj, id, val); +} + void rb_ivar_set_internal(VALUE obj, ID id, VALUE val) { @@ -1962,21 +1972,19 @@ rb_ivar_set_internal(VALUE obj, ID id, VALUE val) ivar_set(obj, id, val); } -void +attr_index_t rb_obj_field_set(VALUE obj, shape_id_t target_shape_id, ID field_name, VALUE val) { switch (BUILTIN_TYPE(obj)) { case T_OBJECT: - obj_field_set(obj, target_shape_id, field_name, val); - break; + return obj_field_set(obj, target_shape_id, field_name, val); case T_CLASS: case T_MODULE: // The only field is object_id and T_CLASS handle it differently. rb_bug("Unreachable"); break; default: - generic_field_set(obj, target_shape_id, field_name, val); - break; + return generic_field_set(obj, target_shape_id, field_name, val); } } @@ -4483,8 +4491,8 @@ rb_iv_set(VALUE obj, const char *name, VALUE val) return rb_ivar_set(obj, id, val); } -static bool -class_fields_ivar_set(VALUE klass, VALUE fields_obj, ID id, VALUE val, bool concurrent, VALUE *new_fields_obj) +static attr_index_t +class_fields_ivar_set(VALUE klass, VALUE fields_obj, ID id, VALUE val, bool concurrent, VALUE *new_fields_obj, bool *new_ivar_out) { const VALUE original_fields_obj = fields_obj; fields_obj = original_fields_obj ? original_fields_obj : rb_imemo_fields_new(klass, 1); @@ -4531,7 +4539,8 @@ class_fields_ivar_set(VALUE klass, VALUE fields_obj, ID id, VALUE val, bool conc } *new_fields_obj = fields_obj; - return new_ivar; + *new_ivar_out = new_ivar; + return index; too_complex: { @@ -4552,21 +4561,19 @@ class_fields_ivar_set(VALUE klass, VALUE fields_obj, ID id, VALUE val, bool conc } *new_fields_obj = fields_obj; - return new_ivar; + *new_ivar_out = new_ivar; + return ATTR_INDEX_NOT_SET; } -bool -rb_class_ivar_set(VALUE obj, ID id, VALUE val) +static attr_index_t +class_ivar_set(VALUE obj, ID id, VALUE val, bool *new_ivar) { - RUBY_ASSERT(RB_TYPE_P(obj, T_CLASS) || RB_TYPE_P(obj, T_MODULE)); - rb_check_frozen(obj); - rb_class_ensure_writable(obj); const VALUE original_fields_obj = RCLASS_WRITABLE_FIELDS_OBJ(obj); VALUE new_fields_obj = 0; - bool new_ivar = class_fields_ivar_set(obj, original_fields_obj, id, val, rb_multi_ractor_p(), &new_fields_obj); + attr_index_t index = class_fields_ivar_set(obj, original_fields_obj, id, val, rb_multi_ractor_p(), &new_fields_obj, new_ivar); if (new_fields_obj != original_fields_obj) { RCLASS_WRITABLE_SET_FIELDS_OBJ(obj, new_fields_obj); @@ -4574,10 +4581,19 @@ rb_class_ivar_set(VALUE obj, ID id, VALUE val) // TODO: What should we set as the T_CLASS shape_id? // In most case we can replicate the single `fields_obj` shape - // but in namespaced case? - // Perhaps INVALID_SHAPE_ID? + // but in namespaced case? Perhaps INVALID_SHAPE_ID? RBASIC_SET_SHAPE_ID(obj, RBASIC_SHAPE_ID(new_fields_obj)); + return index; +} +bool +rb_class_ivar_set(VALUE obj, ID id, VALUE val) +{ + RUBY_ASSERT(RB_TYPE_P(obj, T_CLASS) || RB_TYPE_P(obj, T_MODULE)); + rb_check_frozen(obj); + + bool new_ivar; + class_ivar_set(obj, id, val, &new_ivar); return new_ivar; } diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 1299de8666fed0..0fb4d086b7fc56 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1401,22 +1401,20 @@ vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, #if OPT_IC_FOR_IVAR RB_DEBUG_COUNTER_INC(ivar_set_ic_miss); - if (BUILTIN_TYPE(obj) == T_OBJECT) { - rb_check_frozen(obj); - - attr_index_t index = rb_obj_ivar_set(obj, id, val); - - shape_id_t next_shape_id = RBASIC_SHAPE_ID(obj); + rb_check_frozen(obj); - if (!rb_shape_too_complex_p(next_shape_id)) { - populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr); - } + attr_index_t index = rb_ivar_set_index(obj, id, val); + shape_id_t next_shape_id = RBASIC_SHAPE_ID(obj); - RB_DEBUG_COUNTER_INC(ivar_set_obj_miss); - return val; + if (!rb_shape_too_complex_p(next_shape_id)) { + populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr); } -#endif + + RB_DEBUG_COUNTER_INC(ivar_set_obj_miss); + return val; +#else return rb_ivar_set(obj, id, val); +#endif } static VALUE @@ -1431,6 +1429,49 @@ vm_setivar_slowpath_attr(VALUE obj, ID id, VALUE val, const struct rb_callcache return vm_setivar_slowpath(obj, id, val, NULL, NULL, cc, true); } +NOINLINE(static VALUE vm_setivar_class(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index)); +static VALUE +vm_setivar_class(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index) +{ + if (UNLIKELY(!rb_ractor_main_p())) { + return Qundef; + } + + VALUE fields_obj = RCLASS_WRITABLE_FIELDS_OBJ(obj); + if (UNLIKELY(!fields_obj)) { + return Qundef; + } + + shape_id_t shape_id = RBASIC_SHAPE_ID(fields_obj); + + // Cache hit case + if (shape_id == dest_shape_id) { + RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); + } + else if (dest_shape_id != INVALID_SHAPE_ID) { + if (RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && RSHAPE_EDGE_NAME(dest_shape_id) == id && RSHAPE_CAPACITY(shape_id) == RSHAPE_CAPACITY(dest_shape_id)) { + RUBY_ASSERT(index < RSHAPE_CAPACITY(dest_shape_id)); + } + else { + return Qundef; + } + } + else { + return Qundef; + } + + RB_OBJ_WRITE(fields_obj, &rb_imemo_fields_ptr(fields_obj)[index], val); + + if (shape_id != dest_shape_id) { + RBASIC_SET_SHAPE_ID(obj, dest_shape_id); + RBASIC_SET_SHAPE_ID(fields_obj, dest_shape_id); + } + + RB_DEBUG_COUNTER_INC(ivar_set_ic_hit); + + return val; +} + NOINLINE(static VALUE vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index)); static VALUE vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index) @@ -1627,8 +1668,12 @@ vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC i if (UNLIKELY(UNDEF_P(vm_setivar(obj, id, val, dest_shape_id, index)))) { switch (BUILTIN_TYPE(obj)) { case T_OBJECT: + break; case T_CLASS: case T_MODULE: + if (!UNDEF_P(vm_setivar_class(obj, id, val, dest_shape_id, index))) { + return; + } break; default: if (!UNDEF_P(vm_setivar_default(obj, id, val, dest_shape_id, index))) { @@ -4001,8 +4046,15 @@ vm_call_attrset_direct(rb_execution_context_t *ec, rb_control_frame_t *cfp, cons if (UNDEF_P(res)) { switch (BUILTIN_TYPE(obj)) { case T_OBJECT: + break; case T_CLASS: case T_MODULE: + { + res = vm_setivar_class(obj, id, val, dest_shape_id, index); + if (!UNDEF_P(res)) { + return res; + } + } break; default: { From 6023195fbd7f422e94b9d5cdee39273f71f9078f Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 28 Aug 2025 19:01:48 +0900 Subject: [PATCH 2/2] Abandon `ruby` target on the others than GNU make The default non-transformed name, `ruby` target was added for the case of `--program-transform-name` and similars, but it was occasionally added even when no such option is used. --- configure.ac | 16 +--------------- defs/gmake.mk | 4 ++++ win32/Makefile.sub | 1 + 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/configure.ac b/configure.ac index 82f144a8bbdff5..363748567500a1 100644 --- a/configure.ac +++ b/configure.ac @@ -4699,21 +4699,7 @@ AC_CONFIG_FILES(Makefile:template/Makefile.in, [ echo; echo '$(srcdir)/$(CONFIGURE):RUBY_M4_INCLUDED \ $(empty)' - } > $tmpmk && AS_IF([! grep '^ruby:' $tmpmk > /dev/null], [ - AS_IF([test "${gnumake}" = yes], [ - tmpgmk=confgmk$$.tmp - { - echo "include $tmpmk" - echo "-include uncommon.mk" - } > $tmpgmk - ], [ - tmpgmk=$tmpmk - ]) && - test -z "`${MAKE-make} -f $tmpgmk info-program | grep '^PROGRAM=ruby$'`" && - echo 'ruby: $(PROGRAM);' >> $tmpmk - rm -f uncommon.mk # remove stale uncommon.mk, it should be updated by GNUmakefile - test "$tmpmk" = "$tmpgmk" || rm -f "$tmpgmk" - ]) && mv -f $tmpmk Makefile], + } > $tmpmk && mv -f $tmpmk Makefile], [EXEEXT='$EXEEXT' MAKE='${MAKE-make}' gnumake='$gnumake' GIT='$GIT' YJIT_SUPPORT='$YJIT_SUPPORT']) AC_ARG_WITH([ruby-pc], diff --git a/defs/gmake.mk b/defs/gmake.mk index bd4b8b8e39f201..795320ce2d20aa 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -161,6 +161,10 @@ endif config.status: $(wildcard config.cache) +ifneq (ruby,$(PROGRAM)) +ruby: $(PROGRAM); +endif + STUBPROGRAM = rubystub$(EXEEXT) IGNOREDPATTERNS = %~ .% %.orig %.rej \#%\# SCRIPTBINDIR := $(if $(EXEEXT),,exec/) diff --git a/win32/Makefile.sub b/win32/Makefile.sub index 3312faa7d6c864..ade48d5e7575c2 100644 --- a/win32/Makefile.sub +++ b/win32/Makefile.sub @@ -540,6 +540,7 @@ ECHO_END = all: $(srcdir)/win32/Makefile.sub $(win_srcdir)/Makefile.sub $(srcdir)/common.mk $(srcdir)/depend prog: config +# The default non-transformed names without $(EXEEXT). ruby: $(PROGRAM) rubyw: $(WPROGRAM) stub: $(STUBPROGRAM)