From 7e37e4e743a1ca1d5d7bbb87cdd9b943e3a4fe1d Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 13 Nov 2025 11:02:44 +0900 Subject: [PATCH 1/4] [Bug #21683] Respect reading encoding at `each_codepoint` --- io.c | 15 ++++++++------- test/ruby/test_io_m17n.rb | 13 +++++++++++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/io.c b/io.c index 5366c74c498332..c29ab410a9a071 100644 --- a/io.c +++ b/io.c @@ -4900,7 +4900,7 @@ static VALUE rb_io_each_codepoint(VALUE io) { rb_io_t *fptr; - rb_encoding *enc; + rb_encoding *enc, *read_enc; unsigned int c; int r, n; @@ -4914,12 +4914,13 @@ rb_io_each_codepoint(VALUE io) r = 1; /* no invalid char yet */ for (;;) { make_readconv(fptr, 0); + read_enc = io_read_encoding(fptr); for (;;) { if (fptr->cbuf.len) { - if (fptr->encs.enc) + if (read_enc) r = rb_enc_precise_mbclen(fptr->cbuf.ptr+fptr->cbuf.off, fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len, - fptr->encs.enc); + read_enc); else r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); if (!MBCLEN_NEEDMORE_P(r)) @@ -4931,21 +4932,21 @@ rb_io_each_codepoint(VALUE io) if (more_char(fptr) == MORE_CHAR_FINISHED) { clear_readconv(fptr); if (!MBCLEN_CHARFOUND_P(r)) { - enc = fptr->encs.enc; + enc = read_enc; goto invalid; } return io; } } if (MBCLEN_INVALID_P(r)) { - enc = fptr->encs.enc; + enc = read_enc; goto invalid; } n = MBCLEN_CHARFOUND_LEN(r); - if (fptr->encs.enc) { + if (read_enc) { c = rb_enc_codepoint(fptr->cbuf.ptr+fptr->cbuf.off, fptr->cbuf.ptr+fptr->cbuf.off+fptr->cbuf.len, - fptr->encs.enc); + read_enc); } else { c = (unsigned char)fptr->cbuf.ptr[fptr->cbuf.off]; diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 1986026bfb3d9c..3f905aa1d8caa2 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2806,4 +2806,17 @@ def test_each_codepoint_need_more flunk failure.join("\n---\n") end end + + def test_each_codepoint_encoding_with_ungetc + File.open(File::NULL, "rt:utf-8") do |f| + f.ungetc(%Q[\u{3042}\u{3044}\u{3046}]) + assert_equal [0x3042, 0x3044, 0x3046], f.each_codepoint.to_a + end + File.open(File::NULL, "rt:us-ascii") do |f| + f.ungetc(%Q[\u{3042}\u{3044}\u{3046}]) + assert_raise(ArgumentError) do + f.each_codepoint.to_a + end + end + end end From d80aa36847cb57a785ccaa9b1219fcbd26a74369 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 13 Nov 2025 18:40:11 +0900 Subject: [PATCH 2/4] Ractor support Windows platform. We need to skip only failing tests of RubyGems --- test/rubygems/test_gem_package_tar_header_ractor.rb | 2 +- tool/lib/core_assertions.rb | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/rubygems/test_gem_package_tar_header_ractor.rb b/test/rubygems/test_gem_package_tar_header_ractor.rb index 98fac2802c31d1..a829ec02121b7e 100644 --- a/test/rubygems/test_gem_package_tar_header_ractor.rb +++ b/test/rubygems/test_gem_package_tar_header_ractor.rb @@ -58,4 +58,4 @@ def test_encode_in_ractor assert_headers_equal header_bytes, new_header_bytes RUBY end -end +end unless RUBY_PLATFORM =~ /mingw|mswin/ diff --git a/tool/lib/core_assertions.rb b/tool/lib/core_assertions.rb index a9eb9ec3dceb69..ed38a34f225346 100644 --- a/tool/lib/core_assertions.rb +++ b/tool/lib/core_assertions.rb @@ -391,7 +391,6 @@ def assert_separately(args, file = nil, line = nil, src, ignore_stderr: nil, **o # Run Ractor-related test without influencing the main test suite def assert_ractor(src, args: [], require: nil, require_relative: nil, file: nil, line: nil, ignore_stderr: nil, **opt) omit unless defined?(Ractor) - omit if windows? # https://bugs.ruby-lang.org/issues/21262 shim_value = "class Ractor; alias value take; end" unless Ractor.method_defined?(:value) From 19c2c7e61059f42a29d0d54ee5fd4f021854b2b3 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 13 Nov 2025 19:47:35 +0900 Subject: [PATCH 3/4] [ruby/rubygems] Fixed with Performance/RegexpMatch cop https://github.com/ruby/rubygems/commit/93b8492bc0 --- test/rubygems/test_gem_package_tar_header_ractor.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rubygems/test_gem_package_tar_header_ractor.rb b/test/rubygems/test_gem_package_tar_header_ractor.rb index a829ec02121b7e..8f4cfb0072e265 100644 --- a/test/rubygems/test_gem_package_tar_header_ractor.rb +++ b/test/rubygems/test_gem_package_tar_header_ractor.rb @@ -58,4 +58,4 @@ def test_encode_in_ractor assert_headers_equal header_bytes, new_header_bytes RUBY end -end unless RUBY_PLATFORM =~ /mingw|mswin/ +end unless RUBY_PLATFORM.match?(/mingw|mswin/) From 4a1b88afb82291bd066472e8ad0000b23ed0d4dc Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 13 Nov 2025 20:50:45 +0900 Subject: [PATCH 4/4] Exclude lib/unicode_normalize from lib/un --- tool/sync_default_gems.rb | 6 +++++- tool/test/test_sync_default_gems.rb | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index ff0518b1a599d4..c2f352d797ffc8 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -61,7 +61,10 @@ def lib((upstream, branch), gemspec_in_subdir: false) ]) end - # Note: tool/auto_review_pr.rb also depends on this constant. + # Note: tool/auto_review_pr.rb also depends on these constants. + NO_UPSTREAM = [ + "lib/unicode_normalize", # not to match with "lib/un" + ] REPOSITORIES = { "io-console": repo("ruby/io-console", [ ["ext/io/console", "ext/io/console"], @@ -295,6 +298,7 @@ def lib((upstream, branch), gemspec_in_subdir: false) class << Repository def find_upstream(file) + return if NO_UPSTREAM.any? {|dst| file.start_with?(dst) } REPOSITORIES.find do |repo_name, repository| if repository.mappings.any? {|_src, dst| file.start_with?(dst) } break repo_name diff --git a/tool/test/test_sync_default_gems.rb b/tool/test/test_sync_default_gems.rb index f50be036fe16fd..cdbbb0c5394dc3 100755 --- a/tool/test/test_sync_default_gems.rb +++ b/tool/test/test_sync_default_gems.rb @@ -346,5 +346,20 @@ def test_squash_merge assert_equal("[ruby/#@target] Merge commit", subject, out) assert_includes(body, "Commit in branch", out) end + + def test_no_upstream_file + group = SyncDefaultGems::Repository.group(%w[ + lib/un.rb + lib/unicode_normalize/normalize.rb + lib/unicode_normalize/tables.rb + lib/net/https.rb + ]) + expected = { + "un" => %w[lib/un.rb], + "net-http" => %w[lib/net/https.rb], + nil => %w[lib/unicode_normalize/normalize.rb lib/unicode_normalize/tables.rb], + } + assert_equal(expected, group) + end end if /darwin|linux/ =~ RUBY_PLATFORM end