diff --git a/.gdbinit b/.gdbinit index f204b3a235dacc..bda544c64136ff 100644 --- a/.gdbinit +++ b/.gdbinit @@ -185,12 +185,19 @@ define rp print (struct RBasic *)($arg0) else if ($flags & RUBY_T_MASK) == RUBY_T_DATA - if ((struct RTypedData *)($arg0))->type & 1 - printf "%sT_DATA%s(%s): ", $color_type, $color_end, ((const rb_data_type_t *)(((struct RTypedData *)($arg0))->type & ~1))->wrap_struct_name - print (struct RTypedData *)($arg0) + if ($flags & RUBY_TYPED_FL_IS_TYPED_DATA) + set $data = (struct RTypedData *)($arg0) + set $type = (const rb_data_type_t *)($data->type & ~1) + printf "%sT_DATA%s(%s): ", $color_type, $color_end, $type->wrap_struct_name + print *$type + if ($data->type & 1) + print (void *)&$data->data + else + print $data + end else printf "%sT_DATA%s: ", $color_type, $color_end - print (struct RData *)($arg0) + print *(struct RData *)($arg0) end else if ($flags & RUBY_T_MASK) == RUBY_T_MATCH diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 3551ac8ef2adae..88c19b6fe60ee3 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -208,7 +208,7 @@ jobs: matrix: include: # Using the same setup as ZJIT jobs - - bench_opts: '--warmup=1 --bench=1' + - bench_opts: '--warmup=1 --bench=1 --excludes=lobsters' runs-on: ubuntu-24.04 diff --git a/.github/workflows/zjit-macos.yml b/.github/workflows/zjit-macos.yml index 6da45a3a42acf8..a638907811c3f1 100644 --- a/.github/workflows/zjit-macos.yml +++ b/.github/workflows/zjit-macos.yml @@ -158,7 +158,7 @@ jobs: include: # Test --call-threshold=2 with 2 iterations in total - ruby_opts: '--zjit-call-threshold=2' - bench_opts: '--warmup=1 --bench=1' + bench_opts: '--warmup=1 --bench=1 --excludes=lobsters' configure: '--enable-zjit=dev_nodebug' # --enable-zjit=dev is too slow runs-on: macos-14 diff --git a/.github/workflows/zjit-ubuntu.yml b/.github/workflows/zjit-ubuntu.yml index d8b5460ed7f703..28bfec963e57f5 100644 --- a/.github/workflows/zjit-ubuntu.yml +++ b/.github/workflows/zjit-ubuntu.yml @@ -215,7 +215,7 @@ jobs: include: # Test --call-threshold=2 with 2 iterations in total - ruby_opts: '--zjit-call-threshold=2' - bench_opts: '--warmup=1 --bench=1' + bench_opts: '--warmup=1 --bench=1 --excludes=lobsters' configure: '--enable-zjit=dev_nodebug' # --enable-zjit=dev is too slow runs-on: ubuntu-24.04 diff --git a/debug.c b/debug.c index 4daee2bd1cbd0d..1a2c27a5be5e15 100644 --- a/debug.c +++ b/debug.c @@ -57,6 +57,7 @@ const union { enum ruby_rstring_flags rstring_flags; enum ruby_rarray_flags rarray_flags; enum ruby_rarray_consts rarray_consts; + enum rbimpl_typeddata_flags rtypeddata_consts; enum { RUBY_FMODE_READABLE = FMODE_READABLE, RUBY_FMODE_WRITABLE = FMODE_WRITABLE, diff --git a/include/ruby/internal/abi.h b/include/ruby/internal/abi.h index e735a67564d885..e6d1fa7e8f3770 100644 --- a/include/ruby/internal/abi.h +++ b/include/ruby/internal/abi.h @@ -24,7 +24,7 @@ * In released versions of Ruby, this number is not defined since teeny * versions of Ruby should guarantee ABI compatibility. */ -#define RUBY_ABI_VERSION 0 +#define RUBY_ABI_VERSION 1 /* Windows does not support weak symbols so ruby_abi_version will not exist * in the shared library. */ diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h index 8c0397d80be56e..ec0794e387396f 100644 --- a/include/ruby/internal/core/rtypeddata.h +++ b/include/ruby/internal/core/rtypeddata.h @@ -265,6 +265,13 @@ struct rb_data_type_struct { * @internal */ void (*handle_weak_references)(void *); + + /** + * This field is reserved for future extension. For now, it must be + * filled with zeros. + */ + void *reserved[7]; /* For future extension. + This array *must* be filled with ZERO. */ } function; /** diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index a901a726921375..6552d2dbb80794 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -78,6 +78,19 @@ def self.lex(src, filename = "-", lineno = 1, raise_errors: false) end end + # Tokenizes the Ruby program and returns an array of strings. + # The +filename+ and +lineno+ arguments are mostly ignored, since the + # return value is just the tokenized input. + # By default, this method does not handle syntax errors in +src+, + # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+. + # + # p Ripper.tokenize("def m(a) nil end") + # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"] + # + def self.tokenize(...) + lex(...).map(&:value) + end + # This contains a table of all of the parser events and their # corresponding arity. PARSER_EVENT_TABLE = { diff --git a/misc/lldb_rb/utils.py b/misc/lldb_rb/utils.py index f4775bc4f97774..1415dd3f333371 100644 --- a/misc/lldb_rb/utils.py +++ b/misc/lldb_rb/utils.py @@ -236,13 +236,26 @@ def inspect(self, val): elif rval.is_type("RUBY_T_DATA"): tRTypedData = self.target.FindFirstType("struct RTypedData").GetPointerType() val = val.Cast(tRTypedData) - flag = val.GetValueForExpressionPath("->typed_flag") - - if flag.GetValueAsUnsigned() == 1: - print("T_DATA: %s" % - val.GetValueForExpressionPath("->type->wrap_struct_name"), + is_typed_data = self.ruby_globals.get("RUBY_TYPED_FL_IS_TYPED_DATA", None) + if is_typed_data: + typed = rval.flags & is_typed_data + else: + typed = val.GetValueForExpressionPath("->typed_flag").GetValueAsUnsigned() == 1 + + if typed: + type = val.GetValueForExpressionPath("->type").GetValueAsUnsigned() + embed = (type & 1) + if embed: + flaginfo += "[EMBED] " + type = self.frame.EvaluateExpression("(rb_data_type_t *)%0#x" % (type & ~1)) + print("T_DATA: %s%s" % + (flaginfo, type.GetValueForExpressionPath("->wrap_struct_name")), file=self.result) - self._append_expression("*(struct RTypedData *) %0#x" % val.GetValueAsUnsigned()) + print("%s", type.Dereference(), file=self.result) + ptr = val.GetValueForExpressionPath("->data") + if embed: + ptr = ptr.AddressOf() + self._append_expression("(void *)%0#x" % ptr.GetValueAsUnsigned()) else: print("T_DATA:", file=self.result) self._append_expression("*(struct RData *) %0#x" % val.GetValueAsUnsigned()) diff --git a/re.c b/re.c index b2c1909c153895..82e9407a0a85ca 100644 --- a/re.c +++ b/re.c @@ -1014,6 +1014,7 @@ update_char_offset(VALUE match) char *s, *p, *q; rb_encoding *enc; pair_t *pairs; + VALUE pairs_obj = Qnil; if (rm->char_offset_num_allocated) return; @@ -1035,7 +1036,7 @@ update_char_offset(VALUE match) return; } - pairs = ALLOCA_N(pair_t, num_regs*2); + pairs = RB_ALLOCV_N(pair_t, pairs_obj, num_regs * 2); num_pos = 0; for (i = 0; i < num_regs; i++) { if (BEG(i) < 0) @@ -1070,6 +1071,8 @@ update_char_offset(VALUE match) found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp); rm->char_offset[i].end = found->char_pos; } + + RB_ALLOCV_END(pairs_obj); } static VALUE @@ -2614,6 +2617,7 @@ match_inspect(VALUE match) struct re_registers *regs = RMATCH_REGS(match); int num_regs = regs->num_regs; struct backref_name_tag *names; + VALUE names_obj = Qnil; VALUE regexp = RMATCH(match)->regexp; if (regexp == 0) { @@ -2624,7 +2628,7 @@ match_inspect(VALUE match) cname, rb_reg_nth_match(0, match)); } - names = ALLOCA_N(struct backref_name_tag, num_regs); + names = RB_ALLOCV_N(struct backref_name_tag, names_obj, num_regs); MEMZERO(names, struct backref_name_tag, num_regs); onig_foreach_name(RREGEXP_PTR(regexp), @@ -2652,6 +2656,7 @@ match_inspect(VALUE match) } rb_str_buf_cat2(str, ">"); + RB_ALLOCV_END(names_obj); return str; } diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 2bd9c2fe4af5eb..cac20a073db61b 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -84,6 +84,11 @@ class RipperTest < TestCase define_method("#{fixture.test_name}_lexer_parse") { assert_ripper_lexer_parse(fixture.read) } end + def test_tokenize + source = "foo;1;BAZ" + assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source)) + end + # Check that the hardcoded values don't change without us noticing. def test_internals actual = Translation::Ripper.constants.select { |name| name.start_with?("EXPR_") }.sort diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 2d7a67dd549c61..9feababa5387f5 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1669,6 +1669,30 @@ def test_matchdata assert_equal("hoge fuga", h["body"]) end + def test_matchdata_large_capture_groups_stack + env = {"RUBY_THREAD_MACHINE_STACK_SIZE" => (256 * 1024).to_s} + assert_separately([env], <<~'RUBY') + n = 20000 + require "rbconfig/sizeof" + stack = RubyVM::DEFAULT_PARAMS[:thread_machine_stack_size] + size = RbConfig::SIZEOF["long"] + required = (n + 1) * 4 * size + if !stack || stack == 0 || stack >= required + omit "thread machine stack size not reduced (#{stack}:#{required})" + end + + inspect = Thread.new do + str = "\u{3042}" * n + m = Regexp.new("(.)" * n).match(str) + assert_not_nil(m) + assert_equal([n - 1, n], m.offset(n)) + m.inspect + end.value + + assert_include(inspect, "MatchData") + RUBY + end + def test_regexp_popped EnvUtil.suppress_warning do assert_nothing_raised { eval("a = 1; /\#{ a }/; a") } diff --git a/win32/setup.mak b/win32/setup.mak index 77b7d2f406a1dc..6fc28ebafbc996 100644 --- a/win32/setup.mak +++ b/win32/setup.mak @@ -36,7 +36,7 @@ i686-mswin32: -prologue- -i686- -epilogue- alpha-mswin32: -prologue- -alpha- -epilogue- x64-mswin64: -prologue- -x64- -epilogue- --prologue-: -basic-vars- +-prologue-: -basic-vars- -baseruby- -gmp- -generic-: -osname- -basic-vars-: nul @@ -49,9 +49,13 @@ prefix = $(prefix:\=/) << @type $(config_make) >>$(MAKEFILE) @del $(config_make) > nul + +-baseruby-: nul !if "$(HAVE_BASERUBY)" != "no" - @$(BASERUBY:/=\) "$(srcdir)/tool/missing-baseruby.bat" --verbose $(HAVE_BASERUBY:yes=|| exit )|| exit 0 + @cd $(srcdir:/=\)\tool && $(BASERUBY:/=\) missing-baseruby.bat --verbose || exit $(HAVE_BASERUBY:yes=non-)0 !endif + +-gmp-: !if "$(WITH_GMP)" != "no" @($(CC) $(XINCFLAGS) < nul && (echo USE_GMP = yes) || exit /b 0) >>$(MAKEFILE) #include