From 06312377adc62ea1c82b5574749ee3704a1e4e9f Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 31 Jul 2025 14:38:16 -0400 Subject: [PATCH 1/7] Make Ractor::Selector write-barrier protected --- ractor_sync.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ractor_sync.c b/ractor_sync.c index a3ed38295b29c6..057448c5f58bfa 100644 --- a/ractor_sync.c +++ b/ractor_sync.c @@ -1273,7 +1273,7 @@ static const rb_data_type_t ractor_selector_data_type = { ractor_selector_memsize, NULL, // update }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, }; static struct ractor_selector * @@ -1318,6 +1318,8 @@ ractor_selector_add(VALUE selv, VALUE rpv) } st_insert(s->ports, (st_data_t)rpv, (st_data_t)rp); + RB_OBJ_WRITTEN(selv, Qundef, rpv); + return selv; } From 9b3ad3449be62c46fbacdb59f43aa526f7da2f79 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 6 Aug 2025 15:23:29 +0200 Subject: [PATCH 2/7] Mark `cross_ractor_require_data_type` as embeddable Nothing prevents it, so might as well. --- ractor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ractor.c b/ractor.c index 91542b940b6104..dc83ccc1b408c8 100644 --- a/ractor.c +++ b/ractor.c @@ -2302,7 +2302,7 @@ static const rb_data_type_t cross_ractor_require_data_type = { NULL, // memsize NULL, // compact }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE }; static VALUE From f3206cc79bec2fd852e81ec56de59f0a67ab32b7 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 6 Aug 2025 14:46:36 +0200 Subject: [PATCH 3/7] Struct: keep direct reference to IMEMO/fields when space allows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not rare for structs to have additional ivars, hence are one of the most common, if not the most common type in the `gen_fields_tbl`. This can cause Ractor contention, but even in single ractor mode means having to do a hash lookup to access the ivars, and increase GC work. Instead, unless the struct is perfectly right sized, we can store a reference to the associated IMEMO/fields object right after the last struct member. ``` compare-ruby: ruby 3.5.0dev (2025-08-06T12:50:36Z struct-ivar-fields-2 9a30d141a1) +PRISM [arm64-darwin24] built-ruby: ruby 3.5.0dev (2025-08-06T12:57:59Z struct-ivar-fields-2 2ff3ec237f) +PRISM [arm64-darwin24] warming up..... | |compare-ruby|built-ruby| |:---------------------|-----------:|---------:| |member_reader | 590.317k| 579.246k| | | 1.02x| -| |member_writer | 543.963k| 527.104k| | | 1.03x| -| |member_reader_method | 213.540k| 213.004k| | | 1.00x| -| |member_writer_method | 192.657k| 191.491k| | | 1.01x| -| |ivar_reader | 403.993k| 569.915k| | | -| 1.41x| ``` Co-Authored-By: Étienne Barrié --- benchmark/struct_accessor.yml | 12 +++++++ depend | 8 +++++ gc.c | 13 +++++++ internal/struct.h | 41 ++++++++++++++++++++++ struct.c | 11 +++++- test/ruby/test_object_id.rb | 49 ++++++++++++++++++++++++++ test/ruby/test_ractor.rb | 18 ++++++++++ variable.c | 65 +++++++++++++++++++++++------------ 8 files changed, 194 insertions(+), 23 deletions(-) diff --git a/benchmark/struct_accessor.yml b/benchmark/struct_accessor.yml index 61176cfdd48d3a..d95240e2dd8e45 100644 --- a/benchmark/struct_accessor.yml +++ b/benchmark/struct_accessor.yml @@ -1,5 +1,12 @@ prelude: | C = Struct.new(:x) do + def initialize(...) + super + @ivar = 42 + end + + attr_accessor :ivar + class_eval <<-END def r #{'x;'*256} @@ -15,11 +22,16 @@ prelude: | m = method(:x=) #{'m.call(nil);'*256} end + def r_ivar + #{'ivar;'*256} + end END end + C.new(nil) # ensure common shape is known obj = C.new(nil) benchmark: member_reader: "obj.r" member_writer: "obj.w" member_reader_method: "obj.rm" member_writer_method: "obj.wm" + ivar_reader: "obj.r_ivar" diff --git a/depend b/depend index ec8c2771c92104..ecaf33b1c7710c 100644 --- a/depend +++ b/depend @@ -6065,6 +6065,7 @@ hash.$(OBJEXT): $(top_srcdir)/internal/set_table.h hash.$(OBJEXT): $(top_srcdir)/internal/st.h hash.$(OBJEXT): $(top_srcdir)/internal/static_assert.h hash.$(OBJEXT): $(top_srcdir)/internal/string.h +hash.$(OBJEXT): $(top_srcdir)/internal/struct.h hash.$(OBJEXT): $(top_srcdir)/internal/symbol.h hash.$(OBJEXT): $(top_srcdir)/internal/thread.h hash.$(OBJEXT): $(top_srcdir)/internal/time.h @@ -6288,6 +6289,7 @@ hash.$(OBJEXT): {$(VPATH)}symbol.h hash.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h hash.$(OBJEXT): {$(VPATH)}thread_native.h hash.$(OBJEXT): {$(VPATH)}util.h +hash.$(OBJEXT): {$(VPATH)}variable.h hash.$(OBJEXT): {$(VPATH)}vm_core.h hash.$(OBJEXT): {$(VPATH)}vm_debug.h hash.$(OBJEXT): {$(VPATH)}vm_opts.h @@ -12926,6 +12928,7 @@ range.$(OBJEXT): $(top_srcdir)/internal/enumerator.h range.$(OBJEXT): $(top_srcdir)/internal/error.h range.$(OBJEXT): $(top_srcdir)/internal/fixnum.h range.$(OBJEXT): $(top_srcdir)/internal/gc.h +range.$(OBJEXT): $(top_srcdir)/internal/imemo.h range.$(OBJEXT): $(top_srcdir)/internal/numeric.h range.$(OBJEXT): $(top_srcdir)/internal/range.h range.$(OBJEXT): $(top_srcdir)/internal/serial.h @@ -12948,6 +12951,7 @@ range.$(OBJEXT): {$(VPATH)}config.h range.$(OBJEXT): {$(VPATH)}defines.h range.$(OBJEXT): {$(VPATH)}encoding.h range.$(OBJEXT): {$(VPATH)}id.h +range.$(OBJEXT): {$(VPATH)}id_table.h range.$(OBJEXT): {$(VPATH)}intern.h range.$(OBJEXT): {$(VPATH)}internal.h range.$(OBJEXT): {$(VPATH)}internal/abi.h @@ -15580,6 +15584,7 @@ shape.$(OBJEXT): $(top_srcdir)/internal/serial.h shape.$(OBJEXT): $(top_srcdir)/internal/set_table.h shape.$(OBJEXT): $(top_srcdir)/internal/static_assert.h shape.$(OBJEXT): $(top_srcdir)/internal/string.h +shape.$(OBJEXT): $(top_srcdir)/internal/struct.h shape.$(OBJEXT): $(top_srcdir)/internal/symbol.h shape.$(OBJEXT): $(top_srcdir)/internal/variable.h shape.$(OBJEXT): $(top_srcdir)/internal/vm.h @@ -16569,6 +16574,7 @@ string.$(OBJEXT): $(top_srcdir)/internal/serial.h string.$(OBJEXT): $(top_srcdir)/internal/set_table.h string.$(OBJEXT): $(top_srcdir)/internal/static_assert.h string.$(OBJEXT): $(top_srcdir)/internal/string.h +string.$(OBJEXT): $(top_srcdir)/internal/struct.h string.$(OBJEXT): $(top_srcdir)/internal/transcode.h string.$(OBJEXT): $(top_srcdir)/internal/variable.h string.$(OBJEXT): $(top_srcdir)/internal/vm.h @@ -16766,6 +16772,7 @@ string.$(OBJEXT): {$(VPATH)}thread.h string.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h string.$(OBJEXT): {$(VPATH)}thread_native.h string.$(OBJEXT): {$(VPATH)}util.h +string.$(OBJEXT): {$(VPATH)}variable.h string.$(OBJEXT): {$(VPATH)}vm_core.h string.$(OBJEXT): {$(VPATH)}vm_debug.h string.$(OBJEXT): {$(VPATH)}vm_opts.h @@ -18103,6 +18110,7 @@ variable.$(OBJEXT): $(top_srcdir)/internal/serial.h variable.$(OBJEXT): $(top_srcdir)/internal/set_table.h variable.$(OBJEXT): $(top_srcdir)/internal/static_assert.h variable.$(OBJEXT): $(top_srcdir)/internal/string.h +variable.$(OBJEXT): $(top_srcdir)/internal/struct.h variable.$(OBJEXT): $(top_srcdir)/internal/symbol.h variable.$(OBJEXT): $(top_srcdir)/internal/thread.h variable.$(OBJEXT): $(top_srcdir)/internal/variable.h diff --git a/gc.c b/gc.c index 4af43edcc4cb4b..64a22cd1b78fff 100644 --- a/gc.c +++ b/gc.c @@ -3260,6 +3260,10 @@ rb_gc_mark_children(void *objspace, VALUE obj) gc_mark_internal(ptr[i]); } + if (!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS)) { + gc_mark_internal(RSTRUCT_FIELDS_OBJ(obj)); + } + break; } @@ -4188,6 +4192,15 @@ rb_gc_update_object_references(void *objspace, VALUE obj) for (i = 0; i < len; i++) { UPDATE_IF_MOVED(objspace, ptr[i]); } + + if (RSTRUCT_EMBED_LEN(obj)) { + if (!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS)) { + UPDATE_IF_MOVED(objspace, ptr[len]); + } + } + else { + UPDATE_IF_MOVED(objspace, RSTRUCT(obj)->as.heap.fields_obj); + } } break; default: diff --git a/internal/struct.h b/internal/struct.h index a8c773b730300e..337f96a336d796 100644 --- a/internal/struct.h +++ b/internal/struct.h @@ -11,10 +11,23 @@ #include "ruby/internal/stdbool.h" /* for bool */ #include "ruby/ruby.h" /* for struct RBasic */ +/* Flags of RStruct + * + * 1-7: RSTRUCT_EMBED_LEN + * If non-zero, the struct is embedded (its contents follow the + * header, rather than being on a separately allocated buffer) and + * these bits are the length of the Struct. + * 8: RSTRUCT_GEN_FIELDS + * The struct is embedded and has no space left to store the + * IMEMO/fields reference. Any ivar this struct may have will be in + * the generic_fields_tbl. This flag doesn't imply the struct has + * ivars. + */ enum { RSTRUCT_EMBED_LEN_MASK = RUBY_FL_USER7 | RUBY_FL_USER6 | RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3 | RUBY_FL_USER2 | RUBY_FL_USER1, RSTRUCT_EMBED_LEN_SHIFT = (RUBY_FL_USHIFT+1), + RSTRUCT_GEN_FIELDS = RUBY_FL_USER8, }; struct RStruct { @@ -23,6 +36,7 @@ struct RStruct { struct { long len; const VALUE *ptr; + VALUE fields_obj; } heap; /* This is a length 1 array because: * 1. GCC has a bug that does not optimize C flexible array members @@ -116,4 +130,31 @@ RSTRUCT_GET(VALUE st, long k) return RSTRUCT_CONST_PTR(st)[k]; } +static inline VALUE +RSTRUCT_FIELDS_OBJ(VALUE st) +{ + const long embed_len = RSTRUCT_EMBED_LEN(st); + VALUE fields_obj; + if (embed_len) { + RUBY_ASSERT(!FL_TEST_RAW(st, RSTRUCT_GEN_FIELDS)); + fields_obj = RSTRUCT_GET(st, embed_len); + } + else { + fields_obj = RSTRUCT(st)->as.heap.fields_obj; + } + return fields_obj; +} + +static inline void +RSTRUCT_SET_FIELDS_OBJ(VALUE st, VALUE fields_obj) +{ + const long embed_len = RSTRUCT_EMBED_LEN(st); + if (embed_len) { + RUBY_ASSERT(!FL_TEST_RAW(st, RSTRUCT_GEN_FIELDS)); + RSTRUCT_SET(st, embed_len, fields_obj); + } + else { + RB_OBJ_WRITE(st, &RSTRUCT(st)->as.heap.fields_obj, fields_obj); + } +} #endif /* INTERNAL_STRUCT_H */ diff --git a/struct.c b/struct.c index 74ca9369a61c82..c53e68b3da7f67 100644 --- a/struct.c +++ b/struct.c @@ -811,13 +811,22 @@ struct_alloc(VALUE klass) { long n = num_members(klass); size_t embedded_size = offsetof(struct RStruct, as.ary) + (sizeof(VALUE) * n); + if (RCLASS_MAX_IV_COUNT(klass) > 0) { + embedded_size += sizeof(VALUE); + } + VALUE flags = T_STRUCT | (RGENGC_WB_PROTECTED_STRUCT ? FL_WB_PROTECTED : 0); if (n > 0 && rb_gc_size_allocatable_p(embedded_size)) { flags |= n << RSTRUCT_EMBED_LEN_SHIFT; NEWOBJ_OF(st, struct RStruct, klass, flags, embedded_size, 0); - + if (RCLASS_MAX_IV_COUNT(klass) == 0 && embedded_size == rb_gc_obj_slot_size((VALUE)st)) { + FL_SET_RAW((VALUE)st, RSTRUCT_GEN_FIELDS); + } + else { + RSTRUCT_SET_FIELDS_OBJ((VALUE)st, 0); + } rb_mem_clear((VALUE *)st->as.ary, n); return (VALUE)st; diff --git a/test/ruby/test_object_id.rb b/test/ruby/test_object_id.rb index 24434f8aba0ab8..adb819febce57c 100644 --- a/test/ruby/test_object_id.rb +++ b/test/ruby/test_object_id.rb @@ -252,3 +252,52 @@ def initialize end; end end + +class TestObjectIdStruct < TestObjectId + EmbeddedStruct = Struct.new(:embedded_field) + + def setup + @obj = EmbeddedStruct.new + end +end + +class TestObjectIdStructGenIvar < TestObjectId + GenIvarStruct = Struct.new(:a, :b, :c) + + def setup + @obj = GenIvarStruct.new + end +end + +class TestObjectIdStructNotEmbed < TestObjectId + MANY_IVS = 80 + + StructNotEmbed = Struct.new(*MANY_IVS.times.map { |i| :"field_#{i}" }) + + def setup + @obj = StructNotEmbed.new + end +end + +class TestObjectIdStructTooComplex < TestObjectId + StructTooComplex = Struct.new(:a) do + def initialize + @too_complex_obj_id_test = 1 + end + end + + def setup + if defined?(RubyVM::Shape::SHAPE_MAX_VARIATIONS) + assert_equal 8, RubyVM::Shape::SHAPE_MAX_VARIATIONS + end + 8.times do |i| + StructTooComplex.new.instance_variable_set("@TestObjectIdStructTooComplex#{i}", 1) + end + @obj = StructTooComplex.new + @obj.instance_variable_set("@a#{rand(10_000)}", 1) + + if defined?(RubyVM::Shape) + assert_predicate(RubyVM::Shape.of(@obj), :too_complex?) + end + end +end diff --git a/test/ruby/test_ractor.rb b/test/ruby/test_ractor.rb index 97af7e7413f44e..0a456a1d0f5e1f 100644 --- a/test/ruby/test_ractor.rb +++ b/test/ruby/test_ractor.rb @@ -99,6 +99,24 @@ class TestClass RUBY end + def test_struct_instance_variables + assert_ractor(<<~'RUBY') + StructIvar = Struct.new(:member) do + def initialize(*) + super + @ivar = "ivar" + end + attr_reader :ivar + end + obj = StructIvar.new("member") + obj_copy = Ractor.new { Ractor.receive }.send(obj).value + assert_equal obj.ivar, obj_copy.ivar + refute_same obj.ivar, obj_copy.ivar + assert_equal obj.member, obj_copy.member + refute_same obj.member, obj_copy.member + RUBY + end + def test_fork_raise_isolation_error assert_ractor(<<~'RUBY') ractor = Ractor.new do diff --git a/variable.c b/variable.c index f504cc57f57fc4..76b16b04cb3968 100644 --- a/variable.c +++ b/variable.c @@ -29,6 +29,7 @@ #include "internal/object.h" #include "internal/gc.h" #include "internal/re.h" +#include "internal/struct.h" #include "internal/symbol.h" #include "internal/thread.h" #include "internal/variable.h" @@ -1228,10 +1229,19 @@ rb_obj_fields(VALUE obj, ID field_name) ivar_ractor_check(obj, field_name); VALUE fields_obj = 0; - if (rb_obj_exivar_p(obj)) { - RB_VM_LOCKING() { - if (!st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&fields_obj)) { - rb_bug("Object is missing entry in generic_fields_tbl"); + if (rb_shape_obj_has_fields(obj)) { + switch (BUILTIN_TYPE(obj)) { + case T_STRUCT: + if (LIKELY(!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS))) { + fields_obj = RSTRUCT_FIELDS_OBJ(obj); + break; + } + // fall through + default: + RB_VM_LOCKING() { + if (!st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&fields_obj)) { + rb_bug("Object is missing entry in generic_fields_tbl"); + } } } } @@ -1243,11 +1253,19 @@ rb_free_generic_ivar(VALUE obj) { if (rb_obj_exivar_p(obj)) { st_data_t key = (st_data_t)obj, value; - - RB_VM_LOCKING() { - st_delete(generic_fields_tbl_no_ractor_check(), &key, &value); - RBASIC_SET_SHAPE_ID(obj, ROOT_SHAPE_ID); + switch (BUILTIN_TYPE(obj)) { + case T_STRUCT: + if (LIKELY(!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS))) { + RSTRUCT_SET_FIELDS_OBJ(obj, 0); + break; + } + // fall through + default: + RB_VM_LOCKING() { + st_delete(generic_fields_tbl_no_ractor_check(), &key, &value); + } } + RBASIC_SET_SHAPE_ID(obj, ROOT_SHAPE_ID); } } @@ -1260,12 +1278,20 @@ rb_obj_set_fields(VALUE obj, VALUE fields_obj, ID field_name, VALUE original_fie RUBY_ASSERT(!original_fields_obj || IMEMO_TYPE_P(original_fields_obj, imemo_fields)); if (fields_obj != original_fields_obj) { - RB_VM_LOCKING() { - st_insert(generic_fields_tbl_, (st_data_t)obj, (st_data_t)fields_obj); + switch (BUILTIN_TYPE(obj)) { + case T_STRUCT: + if (LIKELY(!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS))) { + RSTRUCT_SET_FIELDS_OBJ(obj, fields_obj); + break; + } + // fall through + default: + RB_VM_LOCKING() { + st_insert(generic_fields_tbl_, (st_data_t)obj, (st_data_t)fields_obj); + } + RB_OBJ_WRITTEN(obj, original_fields_obj, fields_obj); } - RB_OBJ_WRITTEN(obj, original_fields_obj, fields_obj); - if (original_fields_obj) { // Clear root shape to avoid triggering cleanup such as free_object_id. rb_imemo_fields_clear(original_fields_obj); @@ -1276,11 +1302,11 @@ rb_obj_set_fields(VALUE obj, VALUE fields_obj, ID field_name, VALUE original_fie } void -rb_obj_replace_fields(VALUE obj, VALUE fields_obj, ID field_name) +rb_obj_replace_fields(VALUE obj, VALUE fields_obj) { RB_VM_LOCKING() { - VALUE original_fields_obj = rb_obj_fields(obj, field_name); - rb_obj_set_fields(obj, fields_obj, field_name, original_fields_obj); + VALUE original_fields_obj = rb_obj_fields_no_ractor_check(obj); + rb_obj_set_fields(obj, fields_obj, 0, original_fields_obj); } } @@ -1608,7 +1634,7 @@ obj_transition_too_complex(VALUE obj, st_table *table) { VALUE fields_obj = rb_imemo_fields_new_complex_tbl(rb_obj_class(obj), table); RBASIC_SET_SHAPE_ID(fields_obj, shape_id); - rb_obj_replace_fields(obj, fields_obj, 0); + rb_obj_replace_fields(obj, fields_obj); } } @@ -2299,12 +2325,7 @@ rb_copy_generic_ivar(VALUE dest, VALUE obj) rb_shape_copy_fields(new_fields_obj, dest_buf, dest_shape_id, src_buf, src_shape_id); RBASIC_SET_SHAPE_ID(new_fields_obj, dest_shape_id); - RB_VM_LOCKING() { - st_insert(generic_fields_tbl_no_ractor_check(), (st_data_t)dest, (st_data_t)new_fields_obj); - RB_OBJ_WRITTEN(dest, Qundef, new_fields_obj); - } - - RBASIC_SET_SHAPE_ID(dest, dest_shape_id); + rb_obj_replace_fields(dest, new_fields_obj); } return; From 7aa0e09bfe9cbb9f9abe8111fa3673a85d86ca9b Mon Sep 17 00:00:00 2001 From: ydah Date: Tue, 5 Aug 2025 21:44:13 +0900 Subject: [PATCH 4/7] Fix typo in comment regarding symlink flags in autogen.sh --- autogen.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autogen.sh b/autogen.sh index f11a471a08efa5..6cbc5dddabe65e 100755 --- a/autogen.sh +++ b/autogen.sh @@ -10,7 +10,7 @@ case "$0" in * ) srcdir="";; # Otherwise esac -# If install-only is explicitly requested, disbale symlink flags +# If install-only is explicitly requested, disable symlink flags case " $* " in *" -i "* | *" --install"* ) symlink_flags="" ;; * ) symlink_flags="--install --symlink" ;; From bcd21053f733a93a82a34c6f5c23d6af2a8010ed Mon Sep 17 00:00:00 2001 From: S-H-GAMELINKS Date: Sat, 26 Jul 2025 10:31:23 +0900 Subject: [PATCH 5/7] Add MODULE NODE locations Add `keyword_module` amd `keyword_end` locations to struct `RNode_MODULE`. memo: ``` >ruby --dump=parsetree -e 'module A end' @ ProgramNode (location: (1,0)-(1,12)) +-- locals: [] +-- statements: @ StatementsNode (location: (1,0)-(1,12)) +-- body: (length: 1) +-- @ ModuleNode (location: (1,0)-(1,12)) +-- locals: [] +-- module_keyword_loc: (1,0)-(1,6) = "module" +-- constant_path: | @ ConstantReadNode (location: (1,7)-(1,8)) | +-- name: :A +-- body: nil +-- end_keyword_loc: (1,9)-(1,12) = "end" +-- name: :A ``` --- ast.c | 5 +++++ node_dump.c | 4 +++- parse.y | 10 ++++++---- rubyparser.h | 2 ++ test/ruby/test_ast.rb | 5 +++++ 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/ast.c b/ast.c index 04f2d1384c5edc..bc2adeacd630a4 100644 --- a/ast.c +++ b/ast.c @@ -866,6 +866,11 @@ node_locations(VALUE ast_value, const NODE *node) location_new(&RNODE_IF(node)->if_keyword_loc), location_new(&RNODE_IF(node)->then_keyword_loc), location_new(&RNODE_IF(node)->end_keyword_loc)); + case NODE_MODULE: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_MODULE(node)->module_keyword_loc), + location_new(&RNODE_MODULE(node)->end_keyword_loc)); case NODE_NEXT: return rb_ary_new_from_args(2, location_new(nd_code_loc(node)), diff --git a/node_dump.c b/node_dump.c index 9822ae5fc25a3a..c318baeeede009 100644 --- a/node_dump.c +++ b/node_dump.c @@ -1009,8 +1009,10 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node) ANN("format: module [nd_cpath]; [nd_body]; end"); ANN("example: module M; ..; end"); F_NODE(nd_cpath, RNODE_MODULE, "module path"); - LAST_NODE; F_NODE(nd_body, RNODE_MODULE, "module definition"); + F_LOC(module_keyword_loc, RNODE_MODULE); + LAST_NODE; + F_LOC(end_keyword_loc, RNODE_MODULE); return; case NODE_SCLASS: diff --git a/parse.y b/parse.y index dbe21332b4dd83..e77dc790bcd1cd 100644 --- a/parse.y +++ b/parse.y @@ -1145,7 +1145,7 @@ static rb_node_alias_t *rb_node_alias_new(struct parser_params *p, NODE *nd_1st, static rb_node_valias_t *rb_node_valias_new(struct parser_params *p, ID nd_alias, ID nd_orig, const YYLTYPE *loc, const YYLTYPE *keyword_loc); static rb_node_undef_t *rb_node_undef_new(struct parser_params *p, NODE *nd_undef, const YYLTYPE *loc); static rb_node_class_t *rb_node_class_new(struct parser_params *p, NODE *nd_cpath, NODE *nd_body, NODE *nd_super, const YYLTYPE *loc, const YYLTYPE *class_keyword_loc, const YYLTYPE *inheritance_operator_loc, const YYLTYPE *end_keyword_loc); -static rb_node_module_t *rb_node_module_new(struct parser_params *p, NODE *nd_cpath, NODE *nd_body, const YYLTYPE *loc); +static rb_node_module_t *rb_node_module_new(struct parser_params *p, NODE *nd_cpath, NODE *nd_body, const YYLTYPE *loc, const YYLTYPE *module_keyword_loc, const YYLTYPE *end_keyword_loc); static rb_node_sclass_t *rb_node_sclass_new(struct parser_params *p, NODE *nd_recv, NODE *nd_body, const YYLTYPE *loc); static rb_node_colon2_t *rb_node_colon2_new(struct parser_params *p, NODE *nd_head, ID nd_mid, const YYLTYPE *loc, const YYLTYPE *delimiter_loc, const YYLTYPE *name_loc); static rb_node_colon3_t *rb_node_colon3_new(struct parser_params *p, ID nd_mid, const YYLTYPE *loc, const YYLTYPE *delimiter_loc, const YYLTYPE *name_loc); @@ -1253,7 +1253,7 @@ static rb_node_error_t *rb_node_error_new(struct parser_params *p, const YYLTYPE #define NEW_VALIAS(n,o,loc,k_loc) (NODE *)rb_node_valias_new(p,n,o,loc,k_loc) #define NEW_UNDEF(i,loc) (NODE *)rb_node_undef_new(p,i,loc) #define NEW_CLASS(n,b,s,loc,ck_loc,io_loc,ek_loc) (NODE *)rb_node_class_new(p,n,b,s,loc,ck_loc,io_loc,ek_loc) -#define NEW_MODULE(n,b,loc) (NODE *)rb_node_module_new(p,n,b,loc) +#define NEW_MODULE(n,b,loc,mk_loc,ek_loc) (NODE *)rb_node_module_new(p,n,b,loc,mk_loc,ek_loc) #define NEW_SCLASS(r,b,loc) (NODE *)rb_node_sclass_new(p,r,b,loc) #define NEW_COLON2(c,i,loc,d_loc,n_loc) (NODE *)rb_node_colon2_new(p,c,i,loc,d_loc,n_loc) #define NEW_COLON3(i,loc,d_loc,n_loc) (NODE *)rb_node_colon3_new(p,i,loc,d_loc,n_loc) @@ -4621,7 +4621,7 @@ primary : inline_primary bodystmt k_end { - $$ = NEW_MODULE($cpath, $bodystmt, &@$); + $$ = NEW_MODULE($cpath, $bodystmt, &@$, &@k_module, &@k_end); nd_set_line(RNODE_MODULE($$)->nd_body, @k_end.end_pos.lineno); set_line_body($bodystmt, @cpath.end_pos.lineno); nd_set_line($$, @cpath.end_pos.lineno); @@ -11438,13 +11438,15 @@ rb_node_sclass_new(struct parser_params *p, NODE *nd_recv, NODE *nd_body, const } static rb_node_module_t * -rb_node_module_new(struct parser_params *p, NODE *nd_cpath, NODE *nd_body, const YYLTYPE *loc) +rb_node_module_new(struct parser_params *p, NODE *nd_cpath, NODE *nd_body, const YYLTYPE *loc, const YYLTYPE *module_keyword_loc, const YYLTYPE *end_keyword_loc) { /* Keep the order of node creation */ NODE *scope = NEW_SCOPE(0, nd_body, loc); rb_node_module_t *n = NODE_NEWNODE(NODE_MODULE, rb_node_module_t, loc); n->nd_cpath = nd_cpath; n->nd_body = scope; + n->module_keyword_loc = *module_keyword_loc; + n->end_keyword_loc = *end_keyword_loc; return n; } diff --git a/rubyparser.h b/rubyparser.h index 9fd6906ca60c08..e436d1c404b2a2 100644 --- a/rubyparser.h +++ b/rubyparser.h @@ -901,6 +901,8 @@ typedef struct RNode_MODULE { struct RNode *nd_cpath; struct RNode *nd_body; + rb_code_location_t module_keyword_loc; + rb_code_location_t end_keyword_loc; } rb_node_module_t; typedef struct RNode_SCLASS { diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb index 5524fa7146e035..6372b0d34e5e71 100644 --- a/test/ruby/test_ast.rb +++ b/test/ruby/test_ast.rb @@ -1491,6 +1491,11 @@ def test_lambda_locations assert_locations(node.children[-1].locations, [[1, 0, 1, 20], [1, 0, 1, 2], [1, 10, 1, 12], [1, 17, 1, 20]]) end + def test_module_locations + node = ast_parse('module A end') + assert_locations(node.children[-1].locations, [[1, 0, 1, 12], [1, 0, 1, 6], [1, 9, 1, 12]]) + end + def test_if_locations node = ast_parse("if cond then 1 else 2 end") assert_locations(node.children[-1].locations, [[1, 0, 1, 25], [1, 0, 1, 2], [1, 8, 1, 12], [1, 22, 1, 25]]) From ebb775be8d30e42da66597997bccf57c0d33f58d Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 6 Aug 2025 10:05:20 -0700 Subject: [PATCH 6/7] ZJIT: Fix "immediate value too large" on cmp for x86_64 (#14125) Co-authored-by: Alan Wu --- test/ruby/test_zjit.rb | 8 ++++++++ zjit/src/backend/x86_64/mod.rs | 27 +++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 6db57e18ba729c..e3ef6f65698ad6 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -283,6 +283,14 @@ def test(a, b) = a == b }, insns: [:opt_eq], call_threshold: 2 end + def test_opt_eq_with_minus_one + assert_compiles '[false, true]', %q{ + def test(a) = a == -1 + test(1) # profile opt_eq + [test(0), test(-1)] + }, insns: [:opt_eq], call_threshold: 2 + end + def test_opt_neq_dynamic # TODO(max): Don't split this test; instead, run all tests with and without # profiling. diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index d21c7ee09c82d2..f7aa1acc4dae63 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -381,7 +381,7 @@ impl Assembler mov(cb, Assembler::SCRATCH0, opnd.into()); Assembler::SCRATCH0 } else { - opnd.into() + imm_opnd(*value as i64) } }, _ => opnd.into() @@ -963,7 +963,9 @@ mod tests { asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); asm.compile_with_num_regs(&mut cb, 0); - assert_eq!(format!("{:x}", cb), "4881f8ff000000"); + assert_disasm!(cb, "4881f8ff000000", " + 0x0: cmp rax, 0xff + "); } #[test] @@ -973,7 +975,22 @@ mod tests { asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); asm.compile_with_num_regs(&mut cb, 0); - assert_eq!(format!("{:x}", cb), "49bbffffffffffff00004c39d8"); + assert_disasm!(cb, "49bbffffffffffff00004c39d8", " + 0x0: movabs r11, 0xffffffffffff + 0xa: cmp rax, r11 + "); + } + + #[test] + fn test_emit_cmp_64_bits() { + let (mut asm, mut cb) = setup_asm(); + + asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4883f8ff", " + 0x0: cmp rax, -1 + "); } #[test] @@ -1051,7 +1068,9 @@ mod tests { asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); asm.compile_with_num_regs(&mut cb, 0); - assert_eq!(format!("{:x}", cb), "f6c0ff"); + assert_disasm!(cb, "48f7c0ff000000", " + 0x0: test rax, 0xff + "); } #[test] From 71b46938a7dd61304f012025e06891e8fe2e37fb Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 5 Aug 2025 15:53:02 -0400 Subject: [PATCH 7/7] Fix off-by-one in symbol next_id Symbol last_id was changed to next_id, but it remained to be set to tNEXT_ID - 1 initially, causing the initial static symbol to overlap with the last built-in symbol in id.def. --- symbol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/symbol.c b/symbol.c index abb2c76dc2f758..c337cc288ecded 100644 --- a/symbol.c +++ b/symbol.c @@ -99,7 +99,9 @@ typedef struct { VALUE ids; } rb_symbols_t; -rb_symbols_t ruby_global_symbols = {tNEXT_ID-1}; +rb_symbols_t ruby_global_symbols = { + .next_id = tNEXT_ID, +}; struct sym_set_static_sym_entry { VALUE sym;