From 091a1cd880f2f03085c408c5d8fe4b543eee009b Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 5 Nov 2025 04:06:05 +0900 Subject: [PATCH 01/15] Remove tests for obsolete StringScanner methods ruby/strscan#168 --- spec/ruby/library/stringscanner/clear_spec.rb | 18 ---- spec/ruby/library/stringscanner/empty_spec.rb | 18 ---- spec/ruby/library/stringscanner/eos_spec.rb | 17 +++- .../library/stringscanner/get_byte_spec.rb | 85 +++++++++++++++++- .../library/stringscanner/getbyte_spec.rb | 21 ----- spec/ruby/library/stringscanner/peek_spec.rb | 39 ++++++++- spec/ruby/library/stringscanner/peep_spec.rb | 18 ---- .../library/stringscanner/rest_size_spec.rb | 18 +++- spec/ruby/library/stringscanner/rest_spec.rb | 21 ----- .../library/stringscanner/restsize_spec.rb | 18 ---- spec/ruby/library/stringscanner/shared/eos.rb | 17 ---- .../library/stringscanner/shared/get_byte.rb | 87 ------------------- .../ruby/library/stringscanner/shared/peek.rb | 39 --------- .../library/stringscanner/shared/rest_size.rb | 18 ---- .../library/stringscanner/shared/terminate.rb | 8 -- .../library/stringscanner/terminate_spec.rb | 8 +- 16 files changed, 157 insertions(+), 293 deletions(-) delete mode 100644 spec/ruby/library/stringscanner/clear_spec.rb delete mode 100644 spec/ruby/library/stringscanner/empty_spec.rb delete mode 100644 spec/ruby/library/stringscanner/getbyte_spec.rb delete mode 100644 spec/ruby/library/stringscanner/peep_spec.rb delete mode 100644 spec/ruby/library/stringscanner/restsize_spec.rb delete mode 100644 spec/ruby/library/stringscanner/shared/eos.rb delete mode 100644 spec/ruby/library/stringscanner/shared/get_byte.rb delete mode 100644 spec/ruby/library/stringscanner/shared/peek.rb delete mode 100644 spec/ruby/library/stringscanner/shared/rest_size.rb delete mode 100644 spec/ruby/library/stringscanner/shared/terminate.rb diff --git a/spec/ruby/library/stringscanner/clear_spec.rb b/spec/ruby/library/stringscanner/clear_spec.rb deleted file mode 100644 index 7ae089704a101a..00000000000000 --- a/spec/ruby/library/stringscanner/clear_spec.rb +++ /dev/null @@ -1,18 +0,0 @@ -require_relative '../../spec_helper' -require_relative 'shared/terminate' -require 'strscan' - -describe "StringScanner#clear" do - it_behaves_like :strscan_terminate, :clear - - it "warns in verbose mode that the method is obsolete" do - s = StringScanner.new("abc") - -> { - s.clear - }.should complain(/clear.*obsolete.*terminate/, verbose: true) - - -> { - s.clear - }.should_not complain(verbose: false) - end -end diff --git a/spec/ruby/library/stringscanner/empty_spec.rb b/spec/ruby/library/stringscanner/empty_spec.rb deleted file mode 100644 index d9449bea6ea899..00000000000000 --- a/spec/ruby/library/stringscanner/empty_spec.rb +++ /dev/null @@ -1,18 +0,0 @@ -require_relative '../../spec_helper' -require_relative 'shared/eos' -require 'strscan' - -describe "StringScanner#empty?" do - it_behaves_like :strscan_eos, :empty? - - it "warns in verbose mode that the method is obsolete" do - s = StringScanner.new("abc") - -> { - s.empty? - }.should complain(/empty?.*obsolete.*eos?/, verbose: true) - - -> { - s.empty? - }.should_not complain(verbose: false) - end -end diff --git a/spec/ruby/library/stringscanner/eos_spec.rb b/spec/ruby/library/stringscanner/eos_spec.rb index b58ee1e4737dd5..03c2804e5b647f 100644 --- a/spec/ruby/library/stringscanner/eos_spec.rb +++ b/spec/ruby/library/stringscanner/eos_spec.rb @@ -1,7 +1,20 @@ require_relative '../../spec_helper' -require_relative 'shared/eos' require 'strscan' describe "StringScanner#eos?" do - it_behaves_like :strscan_eos, :eos? + before :each do + @s = StringScanner.new("This is a test") + end + + it "returns true if the scan pointer is at the end of the string" do + @s.terminate + @s.should.eos? + + s = StringScanner.new('') + s.should.eos? + end + + it "returns false if the scan pointer is not at the end of the string" do + @s.should_not.eos? + end end diff --git a/spec/ruby/library/stringscanner/get_byte_spec.rb b/spec/ruby/library/stringscanner/get_byte_spec.rb index 29e2f557de1914..b3c2b7f678edd6 100644 --- a/spec/ruby/library/stringscanner/get_byte_spec.rb +++ b/spec/ruby/library/stringscanner/get_byte_spec.rb @@ -1,7 +1,88 @@ +# encoding: binary require_relative '../../spec_helper' -require_relative 'shared/get_byte' require 'strscan' describe "StringScanner#get_byte" do - it_behaves_like :strscan_get_byte, :get_byte + it "scans one byte and returns it" do + s = StringScanner.new('abc5.') + s.get_byte.should == 'a' + s.get_byte.should == 'b' + s.get_byte.should == 'c' + s.get_byte.should == '5' + s.get_byte.should == '.' + end + + it "is not multi-byte character sensitive" do + s = StringScanner.new("\244\242") + s.get_byte.should == "\244" + s.get_byte.should == "\242" + end + + it "returns nil at the end of the string" do + # empty string case + s = StringScanner.new('') + s.get_byte.should == nil + s.get_byte.should == nil + + # non-empty string case + s = StringScanner.new('a') + s.get_byte # skip one + s.get_byte.should == nil + end + + describe "#[] successive call with a capture group name" do + # https://github.com/ruby/strscan/issues/139 + ruby_version_is ""..."3.5" do # Don't run on 3.5.0dev that already contains not released fixes + version_is StringScanner::Version, "3.1.1"..."3.1.3" do # ruby_version_is "3.4.0"..."3.4.3" + it "returns nil" do + s = StringScanner.new("This is a test") + s.get_byte + s.should.matched? + s[:a].should be_nil + end + end + end + version_is StringScanner::Version, "3.1.3" do # ruby_version_is "3.4.3" + it "raises IndexError" do + s = StringScanner.new("This is a test") + s.get_byte + s.should.matched? + -> { s[:a] }.should raise_error(IndexError) + end + end + + it "returns a matching character when given Integer index" do + s = StringScanner.new("This is a test") + s.get_byte + s[0].should == "T" + end + + # https://github.com/ruby/strscan/issues/135 + ruby_version_is ""..."3.5" do # Don't run on 3.5.0dev that already contains not released fixes + version_is StringScanner::Version, "3.1.1"..."3.1.3" do # ruby_version_is "3.4.0"..."3.4.3" + it "ignores the previous matching with Regexp" do + s = StringScanner.new("This is a test") + s.exist?(/(?This)/) + s.should.matched? + s[:a].should == "This" + + s.get_byte + s.should.matched? + s[:a].should be_nil + end + end + end + version_is StringScanner::Version, "3.1.3" do # ruby_version_is "3.4.3" + it "ignores the previous matching with Regexp" do + s = StringScanner.new("This is a test") + s.exist?(/(?This)/) + s.should.matched? + s[:a].should == "This" + + s.get_byte + s.should.matched? + -> { s[:a] }.should raise_error(IndexError) + end + end + end end diff --git a/spec/ruby/library/stringscanner/getbyte_spec.rb b/spec/ruby/library/stringscanner/getbyte_spec.rb deleted file mode 100644 index e0659a5829a4a7..00000000000000 --- a/spec/ruby/library/stringscanner/getbyte_spec.rb +++ /dev/null @@ -1,21 +0,0 @@ -require_relative '../../spec_helper' -require_relative 'shared/get_byte' -require_relative 'shared/extract_range' -require 'strscan' - -describe "StringScanner#getbyte" do - it_behaves_like :strscan_get_byte, :getbyte - - it "warns in verbose mode that the method is obsolete" do - s = StringScanner.new("abc") - -> { - s.getbyte - }.should complain(/getbyte.*obsolete.*get_byte/, verbose: true) - - -> { - s.getbyte - }.should_not complain(verbose: false) - end - - it_behaves_like :extract_range, :getbyte -end diff --git a/spec/ruby/library/stringscanner/peek_spec.rb b/spec/ruby/library/stringscanner/peek_spec.rb index cbb5630ff9ff03..d490abecf9661e 100644 --- a/spec/ruby/library/stringscanner/peek_spec.rb +++ b/spec/ruby/library/stringscanner/peek_spec.rb @@ -1,7 +1,42 @@ require_relative '../../spec_helper' -require_relative 'shared/peek' require 'strscan' describe "StringScanner#peek" do - it_behaves_like :strscan_peek, :peek + before :each do + @s = StringScanner.new('This is a test') + end + + it "returns at most the specified number of bytes from the current position" do + @s.peek(4).should == "This" + @s.pos.should == 0 + @s.pos = 5 + @s.peek(2).should == "is" + @s.peek(1000).should == "is a test" + + s = StringScanner.new("été") + s.peek(2).should == "é" + end + + it "returns an empty string when the passed argument is zero" do + @s.peek(0).should == "" + end + + it "raises a ArgumentError when the passed argument is negative" do + -> { @s.peek(-2) }.should raise_error(ArgumentError) + end + + it "raises a RangeError when the passed argument is a Bignum" do + -> { @s.peek(bignum_value) }.should raise_error(RangeError) + end + + it "returns an instance of String when passed a String subclass" do + cls = Class.new(String) + sub = cls.new("abc") + + s = StringScanner.new(sub) + + ch = s.peek(1) + ch.should_not be_kind_of(cls) + ch.should be_an_instance_of(String) + end end diff --git a/spec/ruby/library/stringscanner/peep_spec.rb b/spec/ruby/library/stringscanner/peep_spec.rb deleted file mode 100644 index bf6d579325aa2c..00000000000000 --- a/spec/ruby/library/stringscanner/peep_spec.rb +++ /dev/null @@ -1,18 +0,0 @@ -require_relative '../../spec_helper' -require_relative 'shared/peek' -require 'strscan' - -describe "StringScanner#peep" do - it_behaves_like :strscan_peek, :peep - - it "warns in verbose mode that the method is obsolete" do - s = StringScanner.new("abc") - -> { - s.peep(1) - }.should complain(/peep.*obsolete.*peek/, verbose: true) - - -> { - s.peep(1) - }.should_not complain(verbose: false) - end -end diff --git a/spec/ruby/library/stringscanner/rest_size_spec.rb b/spec/ruby/library/stringscanner/rest_size_spec.rb index e62e3a8f8c2d36..a5e971631a5303 100644 --- a/spec/ruby/library/stringscanner/rest_size_spec.rb +++ b/spec/ruby/library/stringscanner/rest_size_spec.rb @@ -1,7 +1,21 @@ require_relative '../../spec_helper' -require_relative 'shared/rest_size' require 'strscan' describe "StringScanner#rest_size" do - it_behaves_like :strscan_rest_size, :rest_size + before :each do + @s = StringScanner.new('This is a test') + end + + it "returns the length of the rest of the string" do + @s.rest_size.should == 14 + @s.scan(/This/) + @s.rest_size.should == 10 + @s.terminate + @s.rest_size.should == 0 + end + + it "is equivalent to rest.size" do + @s.scan(/This/) + @s.rest_size.should == @s.rest.size + end end diff --git a/spec/ruby/library/stringscanner/rest_spec.rb b/spec/ruby/library/stringscanner/rest_spec.rb index 67072f880de2ce..25dcaf30ce4165 100644 --- a/spec/ruby/library/stringscanner/rest_spec.rb +++ b/spec/ruby/library/stringscanner/rest_spec.rb @@ -25,24 +25,3 @@ it_behaves_like :extract_range_matched, :rest end - -describe "StringScanner#rest?" do - before :each do - @s = StringScanner.new("This is a test") - end - - it "returns true if there is more data in the string" do - @s.rest?.should be_true - @s.scan(/This/) - @s.rest?.should be_true - end - - it "returns false if there is no more data in the string" do - @s.terminate - @s.rest?.should be_false - end - - it "is the opposite of eos?" do - @s.rest?.should_not == @s.eos? - end -end diff --git a/spec/ruby/library/stringscanner/restsize_spec.rb b/spec/ruby/library/stringscanner/restsize_spec.rb deleted file mode 100644 index 710520afae4772..00000000000000 --- a/spec/ruby/library/stringscanner/restsize_spec.rb +++ /dev/null @@ -1,18 +0,0 @@ -require_relative '../../spec_helper' -require_relative 'shared/rest_size' -require 'strscan' - -describe "StringScanner#restsize" do - it_behaves_like :strscan_rest_size, :restsize - - it "warns in verbose mode that the method is obsolete" do - s = StringScanner.new("abc") - -> { - s.restsize - }.should complain(/restsize.*obsolete.*rest_size/, verbose: true) - - -> { - s.restsize - }.should_not complain(verbose: false) - end -end diff --git a/spec/ruby/library/stringscanner/shared/eos.rb b/spec/ruby/library/stringscanner/shared/eos.rb deleted file mode 100644 index ea04c764a28921..00000000000000 --- a/spec/ruby/library/stringscanner/shared/eos.rb +++ /dev/null @@ -1,17 +0,0 @@ -describe :strscan_eos, shared: true do - before :each do - @s = StringScanner.new("This is a test") - end - - it "returns true if the scan pointer is at the end of the string" do - @s.terminate - @s.send(@method).should be_true - - s = StringScanner.new('') - s.send(@method).should be_true - end - - it "returns false if the scan pointer is not at the end of the string" do - @s.send(@method).should be_false - end -end diff --git a/spec/ruby/library/stringscanner/shared/get_byte.rb b/spec/ruby/library/stringscanner/shared/get_byte.rb deleted file mode 100644 index 1f7378d5c6e72a..00000000000000 --- a/spec/ruby/library/stringscanner/shared/get_byte.rb +++ /dev/null @@ -1,87 +0,0 @@ -# encoding: binary -require 'strscan' - -describe :strscan_get_byte, shared: true do - it "scans one byte and returns it" do - s = StringScanner.new('abc5.') - s.send(@method).should == 'a' - s.send(@method).should == 'b' - s.send(@method).should == 'c' - s.send(@method).should == '5' - s.send(@method).should == '.' - end - - it "is not multi-byte character sensitive" do - s = StringScanner.new("\244\242") - s.send(@method).should == "\244" - s.send(@method).should == "\242" - end - - it "returns nil at the end of the string" do - # empty string case - s = StringScanner.new('') - s.send(@method).should == nil - s.send(@method).should == nil - - # non-empty string case - s = StringScanner.new('a') - s.send(@method) # skip one - s.send(@method).should == nil - end - - describe "#[] successive call with a capture group name" do - # https://github.com/ruby/strscan/issues/139 - ruby_version_is ""..."3.5" do # Don't run on 3.5.0dev that already contains not released fixes - version_is StringScanner::Version, "3.1.1"..."3.1.3" do # ruby_version_is "3.4.0"..."3.4.3" - it "returns nil" do - s = StringScanner.new("This is a test") - s.send(@method) - s.should.matched? - s[:a].should be_nil - end - end - end - version_is StringScanner::Version, "3.1.3" do # ruby_version_is "3.4.3" - it "raises IndexError" do - s = StringScanner.new("This is a test") - s.send(@method) - s.should.matched? - -> { s[:a] }.should raise_error(IndexError) - end - end - - it "returns a matching character when given Integer index" do - s = StringScanner.new("This is a test") - s.send(@method) - s[0].should == "T" - end - - # https://github.com/ruby/strscan/issues/135 - ruby_version_is ""..."3.5" do # Don't run on 3.5.0dev that already contains not released fixes - version_is StringScanner::Version, "3.1.1"..."3.1.3" do # ruby_version_is "3.4.0"..."3.4.3" - it "ignores the previous matching with Regexp" do - s = StringScanner.new("This is a test") - s.exist?(/(?This)/) - s.should.matched? - s[:a].should == "This" - - s.send(@method) - s.should.matched? - s[:a].should be_nil - end - end - end - version_is StringScanner::Version, "3.1.3" do # ruby_version_is "3.4.3" - it "ignores the previous matching with Regexp" do - s = StringScanner.new("This is a test") - s.exist?(/(?This)/) - s.should.matched? - s[:a].should == "This" - - s.send(@method) - s.should.matched? - -> { s[:a] }.should raise_error(IndexError) - end - end - end -end diff --git a/spec/ruby/library/stringscanner/shared/peek.rb b/spec/ruby/library/stringscanner/shared/peek.rb deleted file mode 100644 index 4c757866c19370..00000000000000 --- a/spec/ruby/library/stringscanner/shared/peek.rb +++ /dev/null @@ -1,39 +0,0 @@ -describe :strscan_peek, shared: true do - before :each do - @s = StringScanner.new('This is a test') - end - - it "returns at most the specified number of bytes from the current position" do - @s.send(@method, 4).should == "This" - @s.pos.should == 0 - @s.pos = 5 - @s.send(@method, 2).should == "is" - @s.send(@method, 1000).should == "is a test" - - s = StringScanner.new("été") - s.send(@method, 2).should == "é" - end - - it "returns an empty string when the passed argument is zero" do - @s.send(@method, 0).should == "" - end - - it "raises a ArgumentError when the passed argument is negative" do - -> { @s.send(@method, -2) }.should raise_error(ArgumentError) - end - - it "raises a RangeError when the passed argument is a Bignum" do - -> { @s.send(@method, bignum_value) }.should raise_error(RangeError) - end - - it "returns an instance of String when passed a String subclass" do - cls = Class.new(String) - sub = cls.new("abc") - - s = StringScanner.new(sub) - - ch = s.send(@method, 1) - ch.should_not be_kind_of(cls) - ch.should be_an_instance_of(String) - end -end diff --git a/spec/ruby/library/stringscanner/shared/rest_size.rb b/spec/ruby/library/stringscanner/shared/rest_size.rb deleted file mode 100644 index 4c4f49e45c0037..00000000000000 --- a/spec/ruby/library/stringscanner/shared/rest_size.rb +++ /dev/null @@ -1,18 +0,0 @@ -describe :strscan_rest_size, shared: true do - before :each do - @s = StringScanner.new('This is a test') - end - - it "returns the length of the rest of the string" do - @s.send(@method).should == 14 - @s.scan(/This/) - @s.send(@method).should == 10 - @s.terminate - @s.send(@method).should == 0 - end - - it "is equivalent to rest.size" do - @s.scan(/This/) - @s.send(@method).should == @s.rest.size - end -end diff --git a/spec/ruby/library/stringscanner/shared/terminate.rb b/spec/ruby/library/stringscanner/shared/terminate.rb deleted file mode 100644 index bf41d097e25806..00000000000000 --- a/spec/ruby/library/stringscanner/shared/terminate.rb +++ /dev/null @@ -1,8 +0,0 @@ -describe :strscan_terminate, shared: true do - it "set the scan pointer to the end of the string and clear matching data." do - s = StringScanner.new('This is a test') - s.send(@method) - s.bol?.should be_false - s.eos?.should be_true - end -end diff --git a/spec/ruby/library/stringscanner/terminate_spec.rb b/spec/ruby/library/stringscanner/terminate_spec.rb index 7943efe0f916df..3cff5c010c4c7a 100644 --- a/spec/ruby/library/stringscanner/terminate_spec.rb +++ b/spec/ruby/library/stringscanner/terminate_spec.rb @@ -1,7 +1,11 @@ require_relative '../../spec_helper' -require_relative 'shared/terminate' require 'strscan' describe "StringScanner#terminate" do - it_behaves_like :strscan_terminate, :terminate + it "set the scan pointer to the end of the string and clear matching data." do + s = StringScanner.new('This is a test') + s.terminate + s.should_not.bol? + s.should.eos? + end end From e9e5a4a4541eb2612fd8e5621edd15d964751d06 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 5 Nov 2025 02:17:34 +0900 Subject: [PATCH 02/15] [ruby/strscan] Remove methods have been obsolete over two decades https://github.com/ruby/strscan/commit/1387def685 --- ext/strscan/strscan.c | 113 ------------------------------------------ 1 file changed, 113 deletions(-) diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index d1b5c5e1d3a812..8842bc8e3e7cd3 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -96,7 +96,6 @@ static VALUE strscan_init_copy _((VALUE vself, VALUE vorig)); static VALUE strscan_s_mustc _((VALUE self)); static VALUE strscan_terminate _((VALUE self)); -static VALUE strscan_clear _((VALUE self)); static VALUE strscan_get_string _((VALUE self)); static VALUE strscan_set_string _((VALUE self, VALUE str)); static VALUE strscan_concat _((VALUE self, VALUE str)); @@ -118,15 +117,11 @@ static VALUE strscan_search_full _((VALUE self, VALUE re, static void adjust_registers_to_matched _((struct strscanner *p)); static VALUE strscan_getch _((VALUE self)); static VALUE strscan_get_byte _((VALUE self)); -static VALUE strscan_getbyte _((VALUE self)); static VALUE strscan_peek _((VALUE self, VALUE len)); -static VALUE strscan_peep _((VALUE self, VALUE len)); static VALUE strscan_scan_base10_integer _((VALUE self)); static VALUE strscan_unscan _((VALUE self)); static VALUE strscan_bol_p _((VALUE self)); static VALUE strscan_eos_p _((VALUE self)); -static VALUE strscan_empty_p _((VALUE self)); -static VALUE strscan_rest_p _((VALUE self)); static VALUE strscan_matched_p _((VALUE self)); static VALUE strscan_matched _((VALUE self)); static VALUE strscan_matched_size _((VALUE self)); @@ -384,21 +379,6 @@ strscan_terminate(VALUE self) return self; } -/* - * call-seq: - * clear -> self - * - * This method is obsolete; use the equivalent method StringScanner#terminate. - */ - - /* :nodoc: */ -static VALUE -strscan_clear(VALUE self) -{ - rb_warning("StringScanner#clear is obsolete; use #terminate instead"); - return strscan_terminate(self); -} - /* * :markup: markdown * :include: strscan/link_refs.txt @@ -1217,22 +1197,6 @@ strscan_get_byte(VALUE self) adjust_register_position(p, p->regs.end[0])); } -/* - * call-seq: - * getbyte - * - * Equivalent to #get_byte. - * This method is obsolete; use #get_byte instead. - */ - - /* :nodoc: */ -static VALUE -strscan_getbyte(VALUE self) -{ - rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead"); - return strscan_get_byte(self); -} - /* * :markup: markdown * :include: strscan/link_refs.txt @@ -1268,22 +1232,6 @@ strscan_peek(VALUE self, VALUE vlen) return extract_beg_len(p, p->curr, len); } -/* - * call-seq: - * peep - * - * Equivalent to #peek. - * This method is obsolete; use #peek instead. - */ - - /* :nodoc: */ -static VALUE -strscan_peep(VALUE self, VALUE vlen) -{ - rb_warning("StringScanner#peep is obsolete; use #peek instead"); - return strscan_peek(self, vlen); -} - static VALUE strscan_parse_integer(struct strscanner *p, int base, long len) { @@ -1523,45 +1471,6 @@ strscan_eos_p(VALUE self) return EOS_P(p) ? Qtrue : Qfalse; } -/* - * call-seq: - * empty? - * - * Equivalent to #eos?. - * This method is obsolete, use #eos? instead. - */ - - /* :nodoc: */ -static VALUE -strscan_empty_p(VALUE self) -{ - rb_warning("StringScanner#empty? is obsolete; use #eos? instead"); - return strscan_eos_p(self); -} - -/* - * call-seq: - * rest? - * - * Returns true if and only if there is more data in the string. See #eos?. - * This method is obsolete; use #eos? instead. - * - * s = StringScanner.new('test string') - * # These two are opposites - * s.eos? # => false - * s.rest? # => true - */ - - /* :nodoc: */ -static VALUE -strscan_rest_p(VALUE self) -{ - struct strscanner *p; - - GET_SCANNER(self, p); - return EOS_P(p) ? Qfalse : Qtrue; -} - /* * :markup: markdown * :include: strscan/link_refs.txt @@ -2052,22 +1961,6 @@ strscan_rest_size(VALUE self) return INT2FIX(i); } -/* - * call-seq: - * restsize - * - * s.restsize is equivalent to s.rest_size. - * This method is obsolete; use #rest_size instead. - */ - - /* :nodoc: */ -static VALUE -strscan_restsize(VALUE self) -{ - rb_warning("StringScanner#restsize is obsolete; use #rest_size instead"); - return strscan_rest_size(self); -} - #define INSPECT_LENGTH 5 /* @@ -2308,7 +2201,6 @@ Init_strscan(void) rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0); rb_define_method(StringScanner, "reset", strscan_reset, 0); rb_define_method(StringScanner, "terminate", strscan_terminate, 0); - rb_define_method(StringScanner, "clear", strscan_clear, 0); rb_define_method(StringScanner, "string", strscan_get_string, 0); rb_define_method(StringScanner, "string=", strscan_set_string, 1); rb_define_method(StringScanner, "concat", strscan_concat, 1); @@ -2333,11 +2225,9 @@ Init_strscan(void) rb_define_method(StringScanner, "getch", strscan_getch, 0); rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0); - rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0); rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0); rb_define_method(StringScanner, "peek", strscan_peek, 1); rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0); - rb_define_method(StringScanner, "peep", strscan_peep, 1); rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0); rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0); @@ -2347,8 +2237,6 @@ Init_strscan(void) rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0); rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?")); rb_define_method(StringScanner, "eos?", strscan_eos_p, 0); - rb_define_method(StringScanner, "empty?", strscan_empty_p, 0); - rb_define_method(StringScanner, "rest?", strscan_rest_p, 0); rb_define_method(StringScanner, "matched?", strscan_matched_p, 0); rb_define_method(StringScanner, "matched", strscan_matched, 0); @@ -2362,7 +2250,6 @@ Init_strscan(void) rb_define_method(StringScanner, "rest", strscan_rest, 0); rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0); - rb_define_method(StringScanner, "restsize", strscan_restsize, 0); rb_define_method(StringScanner, "inspect", strscan_inspect, 0); From f1f2dfebe8a3ed770e3263fb9379d1fb51f85feb Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Tue, 4 Nov 2025 14:46:01 -0500 Subject: [PATCH 03/15] Release VM lock before running finalizers (#15050) We shouldn't run any ruby code with the VM lock held. --- gc.c | 1 + gc/default/default.c | 22 ++++++++++++++-------- test/ruby/test_gc.rb | 21 +++++++++++++++++++++ 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/gc.c b/gc.c index a8320246d03d89..d1e542de2c6ecb 100644 --- a/gc.c +++ b/gc.c @@ -290,6 +290,7 @@ rb_gc_run_obj_finalizer(VALUE objid, long count, VALUE (*callback)(long i, void saved.finished = 0; saved.final = Qundef; + ASSERT_vm_unlocking(); rb_ractor_ignore_belonging(true); EC_PUSH_TAG(ec); enum ruby_tag_type state = EC_EXEC_TAG(); diff --git a/gc/default/default.c b/gc/default/default.c index e0a5aade85f223..6045cec59887a0 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -2763,24 +2763,27 @@ rb_gc_impl_define_finalizer(void *objspace_ptr, VALUE obj, VALUE block) RBASIC(obj)->flags |= FL_FINALIZE; - int lev = RB_GC_VM_LOCK(); + unsigned int lev = RB_GC_VM_LOCK(); if (st_lookup(finalizer_table, obj, &data)) { table = (VALUE)data; + VALUE dup_table = rb_ary_dup(table); + RB_GC_VM_UNLOCK(lev); /* avoid duplicate block, table is usually small */ { long len = RARRAY_LEN(table); long i; for (i = 0; i < len; i++) { - VALUE recv = RARRAY_AREF(table, i); - if (rb_equal(recv, block)) { - RB_GC_VM_UNLOCK(lev); + VALUE recv = RARRAY_AREF(dup_table, i); + if (rb_equal(recv, block)) { // can't be called with VM lock held return recv; } } } + lev = RB_GC_VM_LOCK(); + RB_GC_GUARD(dup_table); rb_ary_push(table, block); } @@ -2841,8 +2844,8 @@ get_final(long i, void *data) return RARRAY_AREF(table, i + 1); } -static void -run_final(rb_objspace_t *objspace, VALUE zombie) +static unsigned int +run_final(rb_objspace_t *objspace, VALUE zombie, unsigned int lev) { if (RZOMBIE(zombie)->dfree) { RZOMBIE(zombie)->dfree(RZOMBIE(zombie)->data); @@ -2853,7 +2856,9 @@ run_final(rb_objspace_t *objspace, VALUE zombie) FL_UNSET(zombie, FL_FINALIZE); st_data_t table; if (st_delete(finalizer_table, &key, &table)) { + RB_GC_VM_UNLOCK(lev); rb_gc_run_obj_finalizer(RARRAY_AREF(table, 0), RARRAY_LEN(table) - 1, get_final, (void *)table); + lev = RB_GC_VM_LOCK(); } else { rb_bug("FL_FINALIZE flag is set, but finalizers are not found"); @@ -2862,6 +2867,7 @@ run_final(rb_objspace_t *objspace, VALUE zombie) else { GC_ASSERT(!st_lookup(finalizer_table, key, NULL)); } + return lev; } static void @@ -2874,9 +2880,9 @@ finalize_list(rb_objspace_t *objspace, VALUE zombie) next_zombie = RZOMBIE(zombie)->next; page = GET_HEAP_PAGE(zombie); - int lev = RB_GC_VM_LOCK(); + unsigned int lev = RB_GC_VM_LOCK(); - run_final(objspace, zombie); + lev = run_final(objspace, zombie, lev); { GC_ASSERT(BUILTIN_TYPE(zombie) == T_ZOMBIE); GC_ASSERT(page->heap->final_slots_count > 0); diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index a8a937f078a9da..7695fd33cf9945 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -914,4 +914,25 @@ def test_old_to_young_reference assert_include ObjectSpace.dump(young_obj), '"old":true' end end + + def test_finalizer_not_run_with_vm_lock + assert_ractor(<<~'RUBY') + Thread.new do + loop do + Encoding.list.each do |enc| + enc.names + end + end + end + + o = Object.new + ObjectSpace.define_finalizer(o, proc do + sleep 0.5 # finalizer shouldn't be run with VM lock, otherwise this context switch will crash + end) + o = nil + 4.times do + GC.start + end + RUBY + end end From 962aa14f240f43ca3bf3516432f7c3a6fbd1d3ff Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Tue, 4 Nov 2025 13:29:14 -0700 Subject: [PATCH 04/15] ZJIT: Add test to reproduce binarytrees crash (#15054) --- test/ruby/test_zjit.rb | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index de2d1e61528e86..ae1af5c2c038b6 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -3164,6 +3164,37 @@ def test(define) }, call_threshold: 2 end + def test_regression_cfp_sp_set_correctly_before_leaf_gc_call + omit 'reproduction for known, unresolved ZJIT bug' + + assert_compiles ':ok', %q{ + def check(l, r) + return 1 unless l + 1 + check(*l) + check(*r) + end + + def tree(depth) + # This duparray is our leaf-gc target. + return [nil, nil] unless depth > 0 + + # Modify the local and pass it to the following calls. + depth -= 1 + [tree(depth), tree(depth)] + end + + def test + GC.stress = true + 2.times do + t = tree(11) + check(*t) + end + :ok + end + + test + }, call_threshold: 14, num_profiles: 5 + end + private # Assert that every method call in `test_script` can be compiled by ZJIT From fffa4671a4cfaea6e6eb2bc6a5dde14ad1a5a400 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 4 Nov 2025 13:33:32 -0800 Subject: [PATCH 05/15] [ruby/strscan] Resurrect a method that has not been obsolete (https://github.com/ruby/strscan/pull/169) Partially revert https://github.com/ruby/strscan/pull/168 because strscan_rest_p did not have `rb_warning("StringScanner#rest? is obsolete")`. It is actively used by the latest tzinfo.gem, and we shouldn't remove it without deprecating it. https://github.com/ruby/strscan/commit/f3fdf21189 --- ext/strscan/strscan.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index 8842bc8e3e7cd3..e2b827c63c9a3f 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -122,6 +122,7 @@ static VALUE strscan_scan_base10_integer _((VALUE self)); static VALUE strscan_unscan _((VALUE self)); static VALUE strscan_bol_p _((VALUE self)); static VALUE strscan_eos_p _((VALUE self)); +static VALUE strscan_rest_p _((VALUE self)); static VALUE strscan_matched_p _((VALUE self)); static VALUE strscan_matched _((VALUE self)); static VALUE strscan_matched_size _((VALUE self)); @@ -1471,6 +1472,29 @@ strscan_eos_p(VALUE self) return EOS_P(p) ? Qtrue : Qfalse; } +/* + * call-seq: + * rest? + * + * Returns true if and only if there is more data in the string. See #eos?. + * This method is obsolete; use #eos? instead. + * + * s = StringScanner.new('test string') + * # These two are opposites + * s.eos? # => false + * s.rest? # => true + */ + + /* :nodoc: */ +static VALUE +strscan_rest_p(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + return EOS_P(p) ? Qfalse : Qtrue; +} + /* * :markup: markdown * :include: strscan/link_refs.txt @@ -2237,6 +2261,7 @@ Init_strscan(void) rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0); rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?")); rb_define_method(StringScanner, "eos?", strscan_eos_p, 0); + rb_define_method(StringScanner, "rest?", strscan_rest_p, 0); rb_define_method(StringScanner, "matched?", strscan_matched_p, 0); rb_define_method(StringScanner, "matched", strscan_matched, 0); From 7a0d730ee320e8b7a46d8fd4719a1ec709fd958c Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 4 Nov 2025 13:39:51 -0800 Subject: [PATCH 06/15] Resurrect tests for StringScanner#rest? that has not been obsolete. Partially reverting https://github.com/ruby/ruby/pull/15049. --- spec/ruby/library/stringscanner/rest_spec.rb | 21 ++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/spec/ruby/library/stringscanner/rest_spec.rb b/spec/ruby/library/stringscanner/rest_spec.rb index 25dcaf30ce4165..67072f880de2ce 100644 --- a/spec/ruby/library/stringscanner/rest_spec.rb +++ b/spec/ruby/library/stringscanner/rest_spec.rb @@ -25,3 +25,24 @@ it_behaves_like :extract_range_matched, :rest end + +describe "StringScanner#rest?" do + before :each do + @s = StringScanner.new("This is a test") + end + + it "returns true if there is more data in the string" do + @s.rest?.should be_true + @s.scan(/This/) + @s.rest?.should be_true + end + + it "returns false if there is no more data in the string" do + @s.terminate + @s.rest?.should be_false + end + + it "is the opposite of eos?" do + @s.rest?.should_not == @s.eos? + end +end From a0376eb2ccc8a893905d270c5363b73ccfcacd2d Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 4 Nov 2025 13:56:35 -0800 Subject: [PATCH 07/15] ZJIT: Fix --zjit-mem-size and add --zjit-exec-mem-size (#15041) ZJIT: Fix --zjit-mem-size and resurrect --zjit-exec-mem-size --- zjit/src/options.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/zjit/src/options.rs b/zjit/src/options.rs index f6471b5461497f..cd3a6439719b3f 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -117,7 +117,7 @@ impl Default for Options { /// description in a separate line if the option name is too long. 80-char limit --> | (any character beyond this `|` column fails the test) pub const ZJIT_OPTIONS: &[(&str, &str)] = &[ ("--zjit-mem-size=num", - "Max amount of memory that ZJIT can use (in MiB)."), + "Max amount of memory that ZJIT can use in MiB (default: 128)."), ("--zjit-call-threshold=num", "Number of calls to trigger JIT (default: 30)."), ("--zjit-num-profiles=num", @@ -175,6 +175,10 @@ const DUMP_LIR_ALL: &[DumpLIR] = &[ DumpLIR::scratch_split, ]; +/// Mamximum value for --zjit-mem-size/--zjit-exec-mem-size in MiB. +/// We set 1TiB just to avoid overflow. We could make it smaller. +const MAX_MEM_MIB: usize = 1024 * 1024; + /// Macro to dump LIR if --zjit-dump-lir is specified macro_rules! asm_dump { ($asm:expr, $target:ident) => { @@ -257,17 +261,19 @@ fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { ("", "") => {}, // Simply --zjit ("mem-size", _) => match opt_val.parse::() { - Ok(n) => { - // Reject 0 or too large values that could overflow. - // The upper bound is 1 TiB but we could make it smaller. - if n == 0 || n > 1024 * 1024 { - return None - } + Ok(n) if (1..=MAX_MEM_MIB).contains(&n) => { + // Convert from MiB to bytes internally for convenience + options.mem_bytes = n * 1024 * 1024; + } + _ => return None, + }, + ("exec-mem-size", _) => match opt_val.parse::() { + Ok(n) if (1..=MAX_MEM_MIB).contains(&n) => { // Convert from MiB to bytes internally for convenience options.exec_mem_bytes = n * 1024 * 1024; } - Err(_) => return None, + _ => return None, }, ("call-threshold", _) => match opt_val.parse() { From 554a78daabbfeb8d8a128d4600f1cc02287cdcd1 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 4 Nov 2025 17:57:25 -0600 Subject: [PATCH 08/15] [ruby/stringio] [DOC] Doc for StringIO.getc (https://github.com/ruby/stringio/pull/163) https://github.com/ruby/stringio/commit/a126fe252f --- doc/stringio/getc.rdoc | 34 ++++++++++++++++++++++++++++++++++ ext/stringio/stringio.c | 6 +++--- 2 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 doc/stringio/getc.rdoc diff --git a/doc/stringio/getc.rdoc b/doc/stringio/getc.rdoc new file mode 100644 index 00000000000000..c021789c911b8d --- /dev/null +++ b/doc/stringio/getc.rdoc @@ -0,0 +1,34 @@ +Reads and returns the next character (or byte; see below) from the stream: + + strio = StringIO.new('foo') + strio.getc # => "f" + strio.getc # => "o" + strio.getc # => "o" + +Returns +nil+ if at end-of-stream: + + strio.eof? # => true + strio.getc # => nil + +Returns characters, not bytes: + + strio = StringIO.new('тест') + strio.getc # => "т" + strio.getc # => "е" + + strio = StringIO.new('こんにちは') + strio.getc # => "こ" + strio.getc # => "ん" + +In each of the examples above, the stream is positioned at the beginning of a character; +in other cases that need not be true: + + strio = StringIO.new('こんにちは') # Five 3-byte characters. + strio.pos = 3 # => 3 # At beginning of second character; returns character. + strio.getc # => "ん" + strio.pos = 4 # => 4 # At second byte of second character; returns byte. + strio.getc # => "\x82" + strio.pos = 5 # => 5 # At third byte of second character; returns byte. + strio.getc # => "\x93" + +Related: StringIO.getbyte. diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index d66768a2c50279..1ceda9dcf01093 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -964,10 +964,10 @@ strio_each_byte(VALUE self) /* * call-seq: - * getc -> character or nil + * getc -> character, byte, or nil + * + * :include: stringio/getc.rdoc * - * Reads and returns the next character from the stream; - * see {Character IO}[rdoc-ref:IO@Character+IO]. */ static VALUE strio_getc(VALUE self) From 9c0f2729c07d553f613d6a65144bb4ce0376e948 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 4 Nov 2025 18:02:38 -0600 Subject: [PATCH 09/15] [ruby/stringio] [DOC] Tweaks for StringIO#internal_encoding (https://github.com/ruby/stringio/pull/166) https://github.com/ruby/stringio/commit/5eeb61df34 --- ext/stringio/stringio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 1ceda9dcf01093..f61815c657d3e4 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -2071,10 +2071,9 @@ strio_external_encoding(VALUE self) /* * call-seq: - * strio.internal_encoding => encoding + * internal_encoding -> nil * - * Returns the Encoding of the internal string if conversion is - * specified. Otherwise returns +nil+. + * Returns +nil+; for compatibility with IO. */ static VALUE From e22d9abad3da623e376a067f98ea62a94ff00887 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 4 Nov 2025 18:04:54 -0600 Subject: [PATCH 10/15] [ruby/stringio] [DOC] Tweaks for StringIO#fileno (https://github.com/ruby/stringio/pull/168) https://github.com/ruby/stringio/commit/9f10c7ae86 --- ext/stringio/stringio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index f61815c657d3e4..f6a639046f12e3 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -434,7 +434,7 @@ strio_false(VALUE self) } /* - * Returns +nil+. Just for compatibility to IO. + * Returns +nil+; for compatibility with IO. */ static VALUE strio_nil(VALUE self) From d5acffba82a2a79a83e0e5dbfa2036f06d497245 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 4 Nov 2025 18:05:06 -0600 Subject: [PATCH 11/15] [ruby/stringio] [DOC] Tweaks for StringIO#fsync (https://github.com/ruby/stringio/pull/170) https://github.com/ruby/stringio/commit/da338d7e5d --- ext/stringio/stringio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index f6a639046f12e3..ffe2ef205f19dd 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -454,7 +454,7 @@ strio_self(VALUE self) } /* - * Returns 0. Just for compatibility to IO. + * Returns 0; for compatibility with IO. */ static VALUE strio_0(VALUE self) From 00b5b3c5637ab26c806400cc7c679d7f9bbd1a2b Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 4 Nov 2025 18:08:17 -0600 Subject: [PATCH 12/15] [ruby/stringio] [DOC] Tweaks for StringIO#isatty (https://github.com/ruby/stringio/pull/167) https://github.com/ruby/stringio/commit/94303ace95 --- ext/stringio/stringio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index ffe2ef205f19dd..6ad526c8e2545a 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -424,7 +424,7 @@ strio_s_new(int argc, VALUE *argv, VALUE klass) } /* - * Returns +false+. Just for compatibility to IO. + * Returns +false+; for compatibility with IO. */ static VALUE strio_false(VALUE self) From be905b2e581540dc2c51a54aed537b19955b7bb0 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 4 Nov 2025 18:08:36 -0600 Subject: [PATCH 13/15] [ruby/stringio] [DOC] Tweaks for StringIO#flush (https://github.com/ruby/stringio/pull/169) https://github.com/ruby/stringio/commit/bef6541b55 --- ext/stringio/stringio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 6ad526c8e2545a..c5e64a5c0f6808 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -444,7 +444,7 @@ strio_nil(VALUE self) } /* - * Returns an object itself. Just for compatibility to IO. + * Returns +self+; for compatibility with IO. */ static VALUE strio_self(VALUE self) From bd3b44cb0a341878abe0edf65d01b1a48c93f088 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 4 Nov 2025 16:09:13 -0800 Subject: [PATCH 14/15] ZJIT: Use a shared trampoline across all ISEQs (#15042) --- vm.c | 69 ++++++++++++++++++++++------------ vm_core.h | 1 + vm_exec.h | 19 ++++++++-- zjit.h | 6 ++- zjit/src/backend/arm64/mod.rs | 30 +++++++++------ zjit/src/backend/lir.rs | 15 +++++++- zjit/src/backend/x86_64/mod.rs | 10 ++++- zjit/src/codegen.rs | 27 ++++++------- zjit/src/cruby.rs | 6 +-- zjit/src/state.rs | 21 +++++++---- 10 files changed, 133 insertions(+), 71 deletions(-) diff --git a/vm.c b/vm.c index 32785dbcc8cbca..f0aebf08a38694 100644 --- a/vm.c +++ b/vm.c @@ -503,7 +503,7 @@ rb_yjit_threshold_hit(const rb_iseq_t *iseq, uint64_t entry_calls) #define rb_yjit_threshold_hit(iseq, entry_calls) false #endif -#if USE_YJIT || USE_ZJIT +#if USE_YJIT // Generate JIT code that supports the following kinds of ISEQ entries: // * The first ISEQ on vm_exec (e.g.
, or Ruby methods/blocks // called by a C method). The current frame has VM_FRAME_FLAG_FINISH. @@ -513,13 +513,32 @@ rb_yjit_threshold_hit(const rb_iseq_t *iseq, uint64_t entry_calls) // The current frame doesn't have VM_FRAME_FLAG_FINISH. The current // vm_exec does NOT stop whether JIT code returns Qundef or not. static inline rb_jit_func_t -jit_compile(rb_execution_context_t *ec) +yjit_compile(rb_execution_context_t *ec) { const rb_iseq_t *iseq = ec->cfp->iseq; struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); + // Increment the ISEQ's call counter and trigger JIT compilation if not compiled + if (body->jit_entry == NULL) { + body->jit_entry_calls++; + if (rb_yjit_threshold_hit(iseq, body->jit_entry_calls)) { + rb_yjit_compile_iseq(iseq, ec, false); + } + } + return body->jit_entry; +} +#else +# define yjit_compile(ec) ((rb_jit_func_t)0) +#endif + #if USE_ZJIT - if (body->jit_entry == NULL && rb_zjit_enabled_p) { +static inline rb_jit_func_t +zjit_compile(rb_execution_context_t *ec) +{ + const rb_iseq_t *iseq = ec->cfp->iseq; + struct rb_iseq_constant_body *body = ISEQ_BODY(iseq); + + if (body->jit_entry == NULL) { body->jit_entry_calls++; // At profile-threshold, rewrite some of the YARV instructions @@ -533,38 +552,38 @@ jit_compile(rb_execution_context_t *ec) rb_zjit_compile_iseq(iseq, false); } } -#endif - -#if USE_YJIT - // Increment the ISEQ's call counter and trigger JIT compilation if not compiled - if (body->jit_entry == NULL && rb_yjit_enabled_p) { - body->jit_entry_calls++; - if (rb_yjit_threshold_hit(iseq, body->jit_entry_calls)) { - rb_yjit_compile_iseq(iseq, ec, false); - } - } -#endif return body->jit_entry; } +#else +# define zjit_compile(ec) ((rb_jit_func_t)0) +#endif -// Execute JIT code compiled by jit_compile() +// Execute JIT code compiled by yjit_compile() or zjit_compile() static inline VALUE jit_exec(rb_execution_context_t *ec) { - rb_jit_func_t func = jit_compile(ec); - if (func) { - // Call the JIT code - return func(ec, ec->cfp); - } - else { +#if USE_YJIT + if (rb_yjit_enabled_p) { + rb_jit_func_t func = yjit_compile(ec); + if (func) { + return func(ec, ec->cfp); + } return Qundef; } -} -#else -# define jit_compile(ec) ((rb_jit_func_t)0) -# define jit_exec(ec) Qundef #endif +#if USE_ZJIT + void *zjit_entry = rb_zjit_entry; + if (zjit_entry) { + rb_jit_func_t func = zjit_compile(ec); + if (func) { + return ((rb_zjit_func_t)zjit_entry)(ec, ec->cfp, func); + } + } +#endif + return Qundef; +} + #if USE_YJIT // Generate JIT code that supports the following kind of ISEQ entry: // * The first ISEQ pushed by vm_exec_handle_exception. The frame would diff --git a/vm_core.h b/vm_core.h index e8e6a6a3a6b3f9..ded0280387b834 100644 --- a/vm_core.h +++ b/vm_core.h @@ -398,6 +398,7 @@ enum rb_builtin_attr { }; typedef VALUE (*rb_jit_func_t)(struct rb_execution_context_struct *, struct rb_control_frame_struct *); +typedef VALUE (*rb_zjit_func_t)(struct rb_execution_context_struct *, struct rb_control_frame_struct *, rb_jit_func_t); struct rb_iseq_constant_body { enum rb_iseq_type type; diff --git a/vm_exec.h b/vm_exec.h index c3b7d4e48882c3..033a48f1e7683c 100644 --- a/vm_exec.h +++ b/vm_exec.h @@ -175,11 +175,22 @@ default: \ // Run the JIT from the interpreter #define JIT_EXEC(ec, val) do { \ - rb_jit_func_t func; \ /* don't run tailcalls since that breaks FINISH */ \ - if (UNDEF_P(val) && GET_CFP() != ec->cfp && (func = jit_compile(ec))) { \ - val = func(ec, ec->cfp); \ - if (ec->tag->state) THROW_EXCEPTION(val); \ + if (UNDEF_P(val) && GET_CFP() != ec->cfp) { \ + rb_zjit_func_t zjit_entry; \ + if (rb_yjit_enabled_p) { \ + rb_jit_func_t func = yjit_compile(ec); \ + if (func) { \ + val = func(ec, ec->cfp); \ + if (ec->tag->state) THROW_EXCEPTION(val); \ + } \ + } \ + else if ((zjit_entry = rb_zjit_entry)) { \ + rb_jit_func_t func = zjit_compile(ec); \ + if (func) { \ + val = zjit_entry(ec, ec->cfp, func); \ + } \ + } \ } \ } while (0) diff --git a/zjit.h b/zjit.h index 7b3e410c91c4b0..47240846ff1db0 100644 --- a/zjit.h +++ b/zjit.h @@ -10,7 +10,7 @@ #endif #if USE_ZJIT -extern bool rb_zjit_enabled_p; +extern void *rb_zjit_entry; extern uint64_t rb_zjit_call_threshold; extern uint64_t rb_zjit_profile_threshold; void rb_zjit_compile_iseq(const rb_iseq_t *iseq, bool jit_exception); @@ -29,7 +29,7 @@ void rb_zjit_before_ractor_spawn(void); void rb_zjit_tracing_invalidate_all(void); void rb_zjit_invalidate_no_singleton_class(VALUE klass); #else -#define rb_zjit_enabled_p false +#define rb_zjit_entry 0 static inline void rb_zjit_compile_iseq(const rb_iseq_t *iseq, bool jit_exception) {} static inline void rb_zjit_profile_insn(uint32_t insn, rb_execution_context_t *ec) {} static inline void rb_zjit_profile_enable(const rb_iseq_t *iseq) {} @@ -42,4 +42,6 @@ static inline void rb_zjit_tracing_invalidate_all(void) {} static inline void rb_zjit_invalidate_no_singleton_class(VALUE klass) {} #endif // #if USE_ZJIT +#define rb_zjit_enabled_p (rb_zjit_entry != 0) + #endif // #ifndef ZJIT_H diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index acf0576f9c80be..532570d732341e 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -1428,17 +1428,25 @@ impl Assembler { } }, Insn::CCall { fptr, .. } => { - // The offset to the call target in bytes - let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64; - let dst_addr = *fptr as i64; - - // Use BL if the offset is short enough to encode as an immediate. - // Otherwise, use BLR with a register. - if b_offset_fits_bits((dst_addr - src_addr) / 4) { - bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); - } else { - emit_load_value(cb, Self::EMIT_OPND, dst_addr as u64); - blr(cb, Self::EMIT_OPND); + match fptr { + Opnd::UImm(fptr) => { + // The offset to the call target in bytes + let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64; + let dst_addr = *fptr as i64; + + // Use BL if the offset is short enough to encode as an immediate. + // Otherwise, use BLR with a register. + if b_offset_fits_bits((dst_addr - src_addr) / 4) { + bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + } else { + emit_load_value(cb, Self::EMIT_OPND, dst_addr as u64); + blr(cb, Self::EMIT_OPND); + } + } + Opnd::Reg(_) => { + blr(cb, fptr.into()); + } + _ => unreachable!("unsupported ccall fptr: {fptr:?}") } }, Insn::CRet { .. } => { diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 66e89a1304d715..e2f75e01c8fcba 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -386,7 +386,9 @@ pub enum Insn { // C function call with N arguments (variadic) CCall { opnds: Vec, - fptr: *const u8, + /// The function pointer to be called. This should be Opnd::const_ptr + /// (Opnd::UImm) in most cases. gen_entry_trampoline() uses Opnd::Reg. + fptr: Opnd, /// Optional PosMarker to remember the start address of the C call. /// It's embedded here to insert the PosMarker after push instructions /// that are split from this CCall on alloc_regs(). @@ -1989,11 +1991,20 @@ impl Assembler { pub fn ccall(&mut self, fptr: *const u8, opnds: Vec) -> Opnd { let canary_opnd = self.set_stack_canary(); let out = self.new_vreg(Opnd::match_num_bits(&opnds)); + let fptr = Opnd::const_ptr(fptr); self.push_insn(Insn::CCall { fptr, opnds, start_marker: None, end_marker: None, out }); self.clear_stack_canary(canary_opnd); out } + /// Call a C function stored in a register + pub fn ccall_reg(&mut self, fptr: Opnd, num_bits: u8) -> Opnd { + assert!(matches!(fptr, Opnd::Reg(_)), "ccall_reg must be called with Opnd::Reg: {fptr:?}"); + let out = self.new_vreg(num_bits); + self.push_insn(Insn::CCall { fptr, opnds: vec![], start_marker: None, end_marker: None, out }); + out + } + /// Call a C function with PosMarkers. This is used for recording the start and end /// addresses of the C call and rewriting it with a different function address later. pub fn ccall_with_pos_markers( @@ -2005,7 +2016,7 @@ impl Assembler { ) -> Opnd { let out = self.new_vreg(Opnd::match_num_bits(&opnds)); self.push_insn(Insn::CCall { - fptr, + fptr: Opnd::const_ptr(fptr), opnds, start_marker: Some(Rc::new(start_marker)), end_marker: Some(Rc::new(end_marker)), diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 1d5d90a856c92e..aea25ca2a46d35 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -863,7 +863,15 @@ impl Assembler { // C function call Insn::CCall { fptr, .. } => { - call_ptr(cb, RAX, *fptr); + match fptr { + Opnd::UImm(fptr) => { + call_ptr(cb, RAX, *fptr as *const u8); + } + Opnd::Reg(_) => { + call(cb, fptr.into()); + } + _ => unreachable!("unsupported ccall fptr: {fptr:?}") + } }, Insn::CRet(opnd) => { diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 7cd677bde3d5b3..01212ac88cdbfa 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -106,8 +106,7 @@ pub extern "C" fn rb_zjit_iseq_gen_entry_point(iseq: IseqPtr, jit_exception: boo } // Always mark the code region executable if asm.compile() has been used. - // We need to do this even if code_ptr is None because, whether gen_entry() - // fails or not, gen_iseq() may have already used asm.compile(). + // We need to do this even if code_ptr is None because gen_iseq() may have already used asm.compile(). cb.mark_all_executable(); code_ptr.map_or(std::ptr::null(), |ptr| ptr.raw_ptr(cb)) @@ -131,10 +130,7 @@ fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr, jit_exception: bool) debug!("{err:?}: gen_iseq failed: {}", iseq_get_location(iseq, 0)); })?; - // Compile an entry point to the JIT code - gen_entry(cb, iseq, start_ptr).inspect_err(|err| { - debug!("{err:?}: gen_entry failed: {}", iseq_get_location(iseq, 0)); - }) + Ok(start_ptr) } /// Stub a branch for a JIT-to-JIT call @@ -170,14 +166,16 @@ fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) { }; } -/// Compile a JIT entry -fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function_ptr: CodePtr) -> Result { +/// Compile a shared JIT entry trampoline +pub fn gen_entry_trampoline(cb: &mut CodeBlock) -> Result { // Set up registers for CFP, EC, SP, and basic block arguments let mut asm = Assembler::new(); - gen_entry_prologue(&mut asm, iseq); + gen_entry_prologue(&mut asm); - // Jump to the first block using a call instruction - asm.ccall(function_ptr.raw_ptr(cb), vec![]); + // Jump to the first block using a call instruction. This trampoline is used + // as rb_zjit_func_t in jit_exec(), which takes (EC, CFP, rb_jit_func_t). + // So C_ARG_OPNDS[2] is rb_jit_func_t, which is (EC, CFP) -> VALUE. + asm.ccall_reg(C_ARG_OPNDS[2], VALUE_BITS); // Restore registers for CFP, EC, and SP after use asm_comment!(asm, "return to the interpreter"); @@ -190,8 +188,7 @@ fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function_ptr: CodePtr) -> Result let start_ptr = code_ptr.raw_addr(cb); let end_ptr = cb.get_write_ptr().raw_addr(cb); let code_size = end_ptr - start_ptr; - let iseq_name = iseq_get_location(iseq, 0); - register_with_perf(format!("entry for {iseq_name}"), start_ptr, code_size); + register_with_perf("ZJIT entry trampoline".into(), start_ptr, code_size); } Ok(code_ptr) } @@ -990,8 +987,8 @@ fn gen_load_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32) -> Opnd } /// Compile an interpreter entry block to be inserted into an ISEQ -fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) { - asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0)); +fn gen_entry_prologue(asm: &mut Assembler) { + asm_comment!(asm, "ZJIT entry trampoline"); // Save the registers we'll use for CFP, EP, SP asm.frame_setup(lir::JIT_PRESERVED_REGS); diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 631acbd8635686..db47385bc88321 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -1071,7 +1071,7 @@ pub use manual_defs::*; pub mod test_utils { use std::{ptr::null, sync::Once}; - use crate::{options::{rb_zjit_call_threshold, rb_zjit_prepare_options, set_call_threshold, DEFAULT_CALL_THRESHOLD}, state::{rb_zjit_enabled_p, ZJITState}}; + use crate::{options::{rb_zjit_call_threshold, rb_zjit_prepare_options, set_call_threshold, DEFAULT_CALL_THRESHOLD}, state::{rb_zjit_entry, ZJITState}}; use super::*; @@ -1114,10 +1114,10 @@ pub mod test_utils { } // Set up globals for convenience - ZJITState::init(); + let zjit_entry = ZJITState::init(); // Enable zjit_* instructions - unsafe { rb_zjit_enabled_p = true; } + unsafe { rb_zjit_entry = zjit_entry; } } /// Make sure the Ruby VM is set up and run a given callback with rb_protect() diff --git a/zjit/src/state.rs b/zjit/src/state.rs index c0e9e0b77ca909..3cb60cffcb6c52 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -1,6 +1,6 @@ //! Runtime state of ZJIT. -use crate::codegen::{gen_exit_trampoline, gen_exit_trampoline_with_counter, gen_function_stub_hit_trampoline}; +use crate::codegen::{gen_entry_trampoline, gen_exit_trampoline, gen_exit_trampoline_with_counter, gen_function_stub_hit_trampoline}; use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, EcPtr, Qnil, rb_vm_insn_addr2opcode, rb_profile_frames, VALUE, VM_INSTRUCTION_SIZE, size_t, rb_gc_mark}; use crate::cruby_methods; use crate::invariants::Invariants; @@ -9,14 +9,16 @@ use crate::options::get_option; use crate::stats::{Counters, InsnCounters, SideExitLocations}; use crate::virtualmem::CodePtr; use std::collections::HashMap; +use std::ptr::null; +/// Shared trampoline to enter ZJIT. Not null when ZJIT is enabled. #[allow(non_upper_case_globals)] #[unsafe(no_mangle)] -pub static mut rb_zjit_enabled_p: bool = false; +pub static mut rb_zjit_entry: *const u8 = null(); /// Like rb_zjit_enabled_p, but for Rust code. pub fn zjit_enabled_p() -> bool { - unsafe { rb_zjit_enabled_p } + unsafe { rb_zjit_entry != null() } } /// Global state needed for code generation @@ -65,8 +67,8 @@ pub struct ZJITState { static mut ZJIT_STATE: Option = None; impl ZJITState { - /// Initialize the ZJIT globals - pub fn init() { + /// Initialize the ZJIT globals. Return the address of the JIT entry trampoline. + pub fn init() -> *const u8 { let mut cb = { use crate::options::*; use crate::virtualmem::*; @@ -79,6 +81,7 @@ impl ZJITState { CodeBlock::new(mem_block.clone(), get_option!(dump_disasm)) }; + let entry_trampoline = gen_entry_trampoline(&mut cb).unwrap().raw_ptr(&cb); let exit_trampoline = gen_exit_trampoline(&mut cb).unwrap(); let function_stub_hit_trampoline = gen_function_stub_hit_trampoline(&mut cb).unwrap(); @@ -114,6 +117,8 @@ impl ZJITState { let code_ptr = gen_exit_trampoline_with_counter(cb, exit_trampoline).unwrap(); ZJITState::get_instance().exit_trampoline_with_counter = code_ptr; } + + entry_trampoline } /// Return true if zjit_state has been initialized @@ -252,7 +257,7 @@ pub extern "C" fn rb_zjit_init() { let result = std::panic::catch_unwind(|| { // Initialize ZJIT states cruby::ids::init(); - ZJITState::init(); + let zjit_entry = ZJITState::init(); // Install a panic hook for ZJIT rb_bug_panic_hook(); @@ -261,8 +266,8 @@ pub extern "C" fn rb_zjit_init() { unsafe { rb_vm_insn_count = 0; } // ZJIT enabled and initialized successfully - assert!(unsafe{ !rb_zjit_enabled_p }); - unsafe { rb_zjit_enabled_p = true; } + assert!(unsafe{ rb_zjit_entry == null() }); + unsafe { rb_zjit_entry = zjit_entry; } }); if result.is_err() { From d24bb1e76155374c82d03e3287f41247a2f04dce Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 4 Nov 2025 18:09:10 -0600 Subject: [PATCH 15/15] [ruby/stringio] [DOC] Tweaks for StringIO#string= (https://github.com/ruby/stringio/pull/172) https://github.com/ruby/stringio/commit/17ae4daf9a --- ext/stringio/stringio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index c5e64a5c0f6808..39e4be58538f9f 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -514,7 +514,7 @@ strio_get_string(VALUE self) * call-seq: * string = other_string -> other_string * - * Assigns the underlying string as +other_string+, and sets position to zero; + * Replaces the stored string with +other_string+, and sets the position to zero; * returns +other_string+: * * StringIO.open('foo') do |strio| @@ -528,7 +528,7 @@ strio_get_string(VALUE self) * "foo" * "bar" * - * Related: StringIO#string (returns the underlying string). + * Related: StringIO#string (returns the stored string). */ static VALUE strio_set_string(VALUE self, VALUE string)