From 3aa866ca0d95a12f933d11e9b25c17f093c212a4 Mon Sep 17 00:00:00 2001 From: Jason Lunn Date: Tue, 13 Jan 2026 09:07:20 -0500 Subject: [PATCH 1/6] [ruby/rubygems] Remove special behavior for rake Fixes https://github.com/ruby/rubygems/pull/9244 https://github.com/ruby/rubygems/commit/7dc3d43d2e --- lib/rubygems/ext/builder.rb | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/rubygems/ext/builder.rb b/lib/rubygems/ext/builder.rb index 350daf1e16d7b7..62d36bcf48d304 100644 --- a/lib/rubygems/ext/builder.rb +++ b/lib/rubygems/ext/builder.rb @@ -163,8 +163,6 @@ def initialize(spec, build_args = spec.build_args, target_rbconfig = Gem.target_ @gem_dir = spec.full_gem_path @target_rbconfig = target_rbconfig @build_jobs = build_jobs - - @ran_rake = false end ## @@ -177,7 +175,6 @@ def builder_for(extension) # :nodoc: when /configure/ then Gem::Ext::ConfigureBuilder when /rakefile/i, /mkrf_conf/i then - @ran_rake = true Gem::Ext::RakeBuilder when /CMakeLists.txt/ then Gem::Ext::CmakeBuilder.new @@ -250,8 +247,6 @@ def build_extensions FileUtils.rm_f @spec.gem_build_complete_path @spec.extensions.each do |extension| - break if @ran_rake - build_extension extension, dest_path end From c959741695d9cccd5376775596dce643b5a500a1 Mon Sep 17 00:00:00 2001 From: Jason Lunn Date: Thu, 15 Jan 2026 17:07:18 -0500 Subject: [PATCH 2/6] [ruby/rubygems] Add test https://github.com/ruby/rubygems/commit/3d8140b289 --- test/rubygems/test_gem_ext_builder.rb | 52 ++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/test/rubygems/test_gem_ext_builder.rb b/test/rubygems/test_gem_ext_builder.rb index 34f85e6b756983..4ac386c4b48522 100644 --- a/test/rubygems/test_gem_ext_builder.rb +++ b/test/rubygems/test_gem_ext_builder.rb @@ -18,7 +18,7 @@ def setup @spec = util_spec "a" - @builder = Gem::Ext::Builder.new @spec, "" + @builder = Gem::Ext::Builder.new @spec end def teardown @@ -201,6 +201,56 @@ def test_build_extensions_install_ext_only Gem.configuration.install_extension_in_lib = @orig_install_extension_in_lib end +def test_build_multiple_extensions + pend "terminates on mswin" if vc_windows? && ruby_repo? + + extension_in_lib do + @spec.extensions << "ext/Rakefile" + @spec.extensions << "ext/extconf.rb" + + ext_dir = File.join @spec.gem_dir, "ext" + + FileUtils.mkdir_p ext_dir + + extconf_rb = File.join ext_dir, "extconf.rb" + rakefile = File.join ext_dir, "Rakefile" + + File.open extconf_rb, "w" do |f| + f.write <<-'RUBY' + require 'mkmf' + + create_makefile 'a' + RUBY + end + + File.open rakefile, "w" do |f| + f.write <<-RUBY + task :default do + FileUtils.touch File.join "#{ext_dir}", 'foo' + end + RUBY + end + + ext_lib_dir = File.join ext_dir, "lib" + FileUtils.mkdir ext_lib_dir + FileUtils.touch File.join ext_lib_dir, "a.rb" + FileUtils.mkdir File.join ext_lib_dir, "a" + FileUtils.touch File.join ext_lib_dir, "a", "b.rb" + + use_ui @ui do + @builder.build_extensions + end + + assert_path_exist @spec.extension_dir + assert_path_exist @spec.gem_build_complete_path + assert_path_exist File.join @spec.gem_dir, "ext", "foo" + assert_path_exist File.join @spec.extension_dir, "gem_make.out" + assert_path_exist File.join @spec.extension_dir, "a.rb" + assert_path_exist File.join @spec.gem_dir, "lib", "a.rb" + assert_path_exist File.join @spec.gem_dir, "lib", "a", "b.rb" + end + end + def test_build_extensions_none use_ui @ui do @builder.build_extensions From 2efffe09e10232dbe886285470e85b77a9ac7d7a Mon Sep 17 00:00:00 2001 From: Jason Lunn Date: Fri, 16 Jan 2026 10:50:16 -0500 Subject: [PATCH 3/6] [ruby/rubygems] Appease the linter https://github.com/ruby/rubygems/commit/aba307411a --- test/rubygems/test_gem_ext_builder.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rubygems/test_gem_ext_builder.rb b/test/rubygems/test_gem_ext_builder.rb index 4ac386c4b48522..c77f8cf89427e0 100644 --- a/test/rubygems/test_gem_ext_builder.rb +++ b/test/rubygems/test_gem_ext_builder.rb @@ -201,7 +201,7 @@ def test_build_extensions_install_ext_only Gem.configuration.install_extension_in_lib = @orig_install_extension_in_lib end -def test_build_multiple_extensions + def test_build_multiple_extensions pend "terminates on mswin" if vc_windows? && ruby_repo? extension_in_lib do From d93235874ab87cccd41762b8fbb0c4e7f5b4db16 Mon Sep 17 00:00:00 2001 From: Jason Lunn Date: Mon, 19 Jan 2026 12:19:29 -0500 Subject: [PATCH 4/6] [ruby/rubygems] Appease the linter again https://github.com/ruby/rubygems/commit/92f35e9684 --- test/rubygems/test_gem_ext_builder.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rubygems/test_gem_ext_builder.rb b/test/rubygems/test_gem_ext_builder.rb index c77f8cf89427e0..9d96593013d24f 100644 --- a/test/rubygems/test_gem_ext_builder.rb +++ b/test/rubygems/test_gem_ext_builder.rb @@ -250,7 +250,7 @@ def test_build_multiple_extensions assert_path_exist File.join @spec.gem_dir, "lib", "a", "b.rb" end end - + def test_build_extensions_none use_ui @ui do @builder.build_extensions From 0f1eea09490287cf9ed907f7cd84398b03074d98 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Tue, 20 Jan 2026 19:07:11 +0900 Subject: [PATCH 5/6] [ruby/rubygems] Skip failing tests https://github.com/ruby/rubygems/commit/ee68fd8d20 --- test/rubygems/test_gem_ext_builder.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/rubygems/test_gem_ext_builder.rb b/test/rubygems/test_gem_ext_builder.rb index 9d96593013d24f..5fcbc3e2acfdec 100644 --- a/test/rubygems/test_gem_ext_builder.rb +++ b/test/rubygems/test_gem_ext_builder.rb @@ -202,7 +202,8 @@ def test_build_extensions_install_ext_only end def test_build_multiple_extensions - pend "terminates on mswin" if vc_windows? && ruby_repo? + pend if RUBY_ENGINE == "truffleruby" + pend "terminates on ruby/ruby" if ruby_repo? extension_in_lib do @spec.extensions << "ext/Rakefile" From 519a4bdbc19693be9020419eb4dea9e66a256a41 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 20 Jan 2026 17:59:15 +0100 Subject: [PATCH 6/6] Optimize File.basename The actual algorithm is largely unchanged, just allowed to use singlebyte checks for common encodings. It could certainly be optimized much further, as here again it often scans from the front of the string when we're interested in the back of it. But the algorithm as many Windows only corner cases so I'd rather ship a good improvement now and eventually come back to it later. Most of improvement here is from the reduced setup cost (avodi double null checks, avoid duping the argument, etc), and skipping the multi-byte checks. ``` compare-ruby: ruby 4.1.0dev (2026-01-19T03:51:30Z master 631bf19b37) +PRISM [arm64-darwin25] built-ruby: ruby 4.1.0dev (2026-01-21T08:21:05Z opt-basename 7eb11745b2) +PRISM [arm64-darwin25] ``` | |compare-ruby|built-ruby| |:----------|-----------:|---------:| |long | 3.412M| 18.158M| | | -| 5.32x| |long_name | 1.981M| 8.580M| | | -| 4.33x| |withext | 3.200M| 12.986M| | | -| 4.06x| --- benchmark/file_basename.yml | 6 ++ file.c | 88 ++++++++++++++++++---------- internal/string.h | 8 ++- spec/ruby/core/file/basename_spec.rb | 26 ++++++++ 4 files changed, 97 insertions(+), 31 deletions(-) create mode 100644 benchmark/file_basename.yml diff --git a/benchmark/file_basename.yml b/benchmark/file_basename.yml new file mode 100644 index 00000000000000..fbd78785aa2262 --- /dev/null +++ b/benchmark/file_basename.yml @@ -0,0 +1,6 @@ +prelude: | + # frozen_string_literal: true +benchmark: + long: File.basename("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml") + long_name: File.basename("Users_george_src_github.com_ruby_ruby_benchmark_file_dirname.yml") + withext: File.basename("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml", ".yml") diff --git a/file.c b/file.c index a98bb9728e00db..c23e92d22e029f 100644 --- a/file.c +++ b/file.c @@ -3749,7 +3749,7 @@ strrdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc) } static char * -chompdirsep(const char *path, const char *end, rb_encoding *enc) +chompdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc) { while (path < end) { if (isdirsep(*path)) { @@ -3758,7 +3758,7 @@ chompdirsep(const char *path, const char *end, rb_encoding *enc) if (path >= end) return (char *)last; } else { - Inc(path, end, true, enc); + Inc(path, end, mb_enc, enc); } } return (char *)path; @@ -3768,7 +3768,7 @@ char * rb_enc_path_end(const char *path, const char *end, rb_encoding *enc) { if (path < end && isdirsep(*path)) path++; - return chompdirsep(path, end, enc); + return chompdirsep(path, end, true, enc); } static rb_encoding * @@ -4088,7 +4088,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na rb_enc_associate(result, enc = fs_enc_check(result, fname)); p = pend; } - p = chompdirsep(skiproot(buf, p), p, enc); + p = chompdirsep(skiproot(buf, p), p, true, enc); s += 2; } } @@ -4113,7 +4113,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } else #endif /* defined DOSISH || defined __CYGWIN__ */ - p = chompdirsep(skiproot(buf, p), p, enc); + p = chompdirsep(skiproot(buf, p), p, true, enc); } else { size_t len; @@ -4656,7 +4656,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum root_found: RSTRING_GETMEM(resolved, prefixptr, prefixlen); pend = prefixptr + prefixlen; - ptr = chompdirsep(prefixptr, pend, enc); + ptr = chompdirsep(prefixptr, pend, true, enc); if (ptr < pend) { prefixlen = ++ptr - prefixptr; rb_str_set_len(resolved, prefixlen); @@ -4910,8 +4910,8 @@ rmext(const char *p, long l0, long l1, const char *e, long l2, rb_encoding *enc) return 0; } -const char * -ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc) +static inline const char * +enc_find_basename(const char *name, long *baselen, long *alllen, bool mb_enc, rb_encoding *enc) { const char *p, *q, *e, *end; #if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC @@ -4919,13 +4919,22 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin #endif long f = 0, n = -1; - end = name + (alllen ? (size_t)*alllen : strlen(name)); - name = skipprefix(name, end, true, enc); + long len = (alllen ? (size_t)*alllen : strlen(name)); + + if (len <= 0) { + return name; + } + + end = name + len; + name = skipprefix(name, end, mb_enc, enc); #if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC root = name; #endif - while (isdirsep(*name)) + + while (isdirsep(*name)) { name++; + } + if (!*name) { p = name - 1; f = 1; @@ -4947,32 +4956,47 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin #endif /* defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC */ } else { - if (!(p = strrdirsep(name, end, true, enc))) { + p = strrdirsep(name, end, mb_enc, enc); + if (!p) { p = name; } else { - while (isdirsep(*p)) p++; /* skip last / */ + while (isdirsep(*p)) { + p++; /* skip last / */ + } } #if USE_NTFS n = ntfs_tail(p, end, enc) - p; #else - n = chompdirsep(p, end, enc) - p; + n = chompdirsep(p, end, mb_enc, enc) - p; #endif for (q = p; q - p < n && *q == '.'; q++); - for (e = 0; q - p < n; Inc(q, end, true, enc)) { + for (e = 0; q - p < n; Inc(q, end, mb_enc, enc)) { if (*q == '.') e = q; } - if (e) f = e - p; - else f = n; + if (e) { + f = e - p; + } + else { + f = n; + } } - if (baselen) + if (baselen) { *baselen = f; - if (alllen) + } + if (alllen) { *alllen = n; + } return p; } +const char * +ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc) +{ + return enc_find_basename(name, baselen, alllen, true, enc); +} + /* * call-seq: * File.basename(file_name [, suffix] ) -> base_name @@ -4993,7 +5017,7 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin static VALUE rb_file_s_basename(int argc, VALUE *argv, VALUE _) { - VALUE fname, fext, basename; + VALUE fname, fext; const char *name, *p; long f, n; rb_encoding *enc; @@ -5006,15 +5030,19 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _) enc = rb_str_enc_get(fext); } fname = argv[0]; - FilePathStringValue(fname); + CheckPath(fname, name); if (NIL_P(fext) || !(enc = rb_enc_compatible(fname, fext))) { - enc = rb_enc_get(fname); + enc = rb_str_enc_get(fname); fext = Qnil; } - if ((n = RSTRING_LEN(fname)) == 0 || !*(name = RSTRING_PTR(fname))) - return rb_str_new_shared(fname); - p = ruby_enc_find_basename(name, &f, &n, enc); + n = RSTRING_LEN(fname); + if (n == 0 || !*name) { + rb_enc_str_new(0, 0, enc); + } + + bool mb_enc = !rb_str_encindex_fastpath(rb_enc_to_index(enc)); + p = enc_find_basename(name, &f, &n, mb_enc, enc); if (n >= 0) { if (NIL_P(fext)) { f = n; @@ -5027,12 +5055,12 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _) } RB_GC_GUARD(fext); } - if (f == RSTRING_LEN(fname)) return rb_str_new_shared(fname); + if (f == RSTRING_LEN(fname)) { + return rb_str_new_shared(fname); + } } - basename = rb_str_new(p, f); - rb_enc_copy(basename, fname); - return basename; + return rb_enc_str_new(p, f, enc); } static VALUE rb_file_dirname_n(VALUE fname, int n); @@ -5350,7 +5378,7 @@ rb_file_join_ary(VALUE ary) rb_enc_copy(result, tmp); } else { - tail = chompdirsep(name, name + len, rb_enc_get(result)); + tail = chompdirsep(name, name + len, true, rb_enc_get(result)); if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) { rb_str_set_len(result, tail - name); } diff --git a/internal/string.h b/internal/string.h index dd5e20c0c68ed1..9212ce898653be 100644 --- a/internal/string.h +++ b/internal/string.h @@ -33,7 +33,13 @@ enum ruby_rstring_private_flags { static inline bool rb_str_encindex_fastpath(int encindex) { - // The overwhelming majority of strings are in one of these 3 encodings. + // The overwhelming majority of strings are in one of these 3 encodings, + // which are all either ASCII or perfect ASCII supersets. + // Hence you can use fast, single byte algorithms on them, such as `memchr` etc, + // without all the overhead of fetching the rb_encoding and using functions such as + // rb_enc_mbminlen etc. + // Many other encodings could qualify, but they are expected to be rare occurences, + // so it's better to keep that list small. switch (encindex) { case ENCINDEX_ASCII_8BIT: case ENCINDEX_UTF_8: diff --git a/spec/ruby/core/file/basename_spec.rb b/spec/ruby/core/file/basename_spec.rb index 989409d76b91de..87695ab97be3ca 100644 --- a/spec/ruby/core/file/basename_spec.rb +++ b/spec/ruby/core/file/basename_spec.rb @@ -151,8 +151,34 @@ File.basename("c:\\bar.txt", ".*").should == "bar" File.basename("c:\\bar.txt.exe", ".*").should == "bar.txt" end + + it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence" do + # dir\fileソname.txt + path = "dir\\file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + path.valid_encoding?.should be_true + File.basename(path).should == "file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + end end + it "rejects strings encoded with non ASCII-compatible encodings" do + Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |enc| + begin + path = "/foo/bar".encode(enc) + rescue Encoding::ConverterNotFoundError + next + end + + -> { + File.basename(path) + }.should raise_error(Encoding::CompatibilityError) + end + end + + it "works with all ASCII-compatible encodings" do + Encoding.list.select(&:ascii_compatible?).each do |enc| + File.basename("/foo/bar".encode(enc)).should == "bar".encode(enc) + end + end it "returns the extension for a multibyte filename" do File.basename('/path/Офис.m4a').should == "Офис.m4a"