diff --git a/benchmark/file_basename.yml b/benchmark/file_basename.yml new file mode 100644 index 00000000000000..fbd78785aa2262 --- /dev/null +++ b/benchmark/file_basename.yml @@ -0,0 +1,6 @@ +prelude: | + # frozen_string_literal: true +benchmark: + long: File.basename("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml") + long_name: File.basename("Users_george_src_github.com_ruby_ruby_benchmark_file_dirname.yml") + withext: File.basename("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml", ".yml") diff --git a/file.c b/file.c index a98bb9728e00db..c23e92d22e029f 100644 --- a/file.c +++ b/file.c @@ -3749,7 +3749,7 @@ strrdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc) } static char * -chompdirsep(const char *path, const char *end, rb_encoding *enc) +chompdirsep(const char *path, const char *end, bool mb_enc, rb_encoding *enc) { while (path < end) { if (isdirsep(*path)) { @@ -3758,7 +3758,7 @@ chompdirsep(const char *path, const char *end, rb_encoding *enc) if (path >= end) return (char *)last; } else { - Inc(path, end, true, enc); + Inc(path, end, mb_enc, enc); } } return (char *)path; @@ -3768,7 +3768,7 @@ char * rb_enc_path_end(const char *path, const char *end, rb_encoding *enc) { if (path < end && isdirsep(*path)) path++; - return chompdirsep(path, end, enc); + return chompdirsep(path, end, true, enc); } static rb_encoding * @@ -4088,7 +4088,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na rb_enc_associate(result, enc = fs_enc_check(result, fname)); p = pend; } - p = chompdirsep(skiproot(buf, p), p, enc); + p = chompdirsep(skiproot(buf, p), p, true, enc); s += 2; } } @@ -4113,7 +4113,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } else #endif /* defined DOSISH || defined __CYGWIN__ */ - p = chompdirsep(skiproot(buf, p), p, enc); + p = chompdirsep(skiproot(buf, p), p, true, enc); } else { size_t len; @@ -4656,7 +4656,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum root_found: RSTRING_GETMEM(resolved, prefixptr, prefixlen); pend = prefixptr + prefixlen; - ptr = chompdirsep(prefixptr, pend, enc); + ptr = chompdirsep(prefixptr, pend, true, enc); if (ptr < pend) { prefixlen = ++ptr - prefixptr; rb_str_set_len(resolved, prefixlen); @@ -4910,8 +4910,8 @@ rmext(const char *p, long l0, long l1, const char *e, long l2, rb_encoding *enc) return 0; } -const char * -ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc) +static inline const char * +enc_find_basename(const char *name, long *baselen, long *alllen, bool mb_enc, rb_encoding *enc) { const char *p, *q, *e, *end; #if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC @@ -4919,13 +4919,22 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin #endif long f = 0, n = -1; - end = name + (alllen ? (size_t)*alllen : strlen(name)); - name = skipprefix(name, end, true, enc); + long len = (alllen ? (size_t)*alllen : strlen(name)); + + if (len <= 0) { + return name; + } + + end = name + len; + name = skipprefix(name, end, mb_enc, enc); #if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC root = name; #endif - while (isdirsep(*name)) + + while (isdirsep(*name)) { name++; + } + if (!*name) { p = name - 1; f = 1; @@ -4947,32 +4956,47 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin #endif /* defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC */ } else { - if (!(p = strrdirsep(name, end, true, enc))) { + p = strrdirsep(name, end, mb_enc, enc); + if (!p) { p = name; } else { - while (isdirsep(*p)) p++; /* skip last / */ + while (isdirsep(*p)) { + p++; /* skip last / */ + } } #if USE_NTFS n = ntfs_tail(p, end, enc) - p; #else - n = chompdirsep(p, end, enc) - p; + n = chompdirsep(p, end, mb_enc, enc) - p; #endif for (q = p; q - p < n && *q == '.'; q++); - for (e = 0; q - p < n; Inc(q, end, true, enc)) { + for (e = 0; q - p < n; Inc(q, end, mb_enc, enc)) { if (*q == '.') e = q; } - if (e) f = e - p; - else f = n; + if (e) { + f = e - p; + } + else { + f = n; + } } - if (baselen) + if (baselen) { *baselen = f; - if (alllen) + } + if (alllen) { *alllen = n; + } return p; } +const char * +ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc) +{ + return enc_find_basename(name, baselen, alllen, true, enc); +} + /* * call-seq: * File.basename(file_name [, suffix] ) -> base_name @@ -4993,7 +5017,7 @@ ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encodin static VALUE rb_file_s_basename(int argc, VALUE *argv, VALUE _) { - VALUE fname, fext, basename; + VALUE fname, fext; const char *name, *p; long f, n; rb_encoding *enc; @@ -5006,15 +5030,19 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _) enc = rb_str_enc_get(fext); } fname = argv[0]; - FilePathStringValue(fname); + CheckPath(fname, name); if (NIL_P(fext) || !(enc = rb_enc_compatible(fname, fext))) { - enc = rb_enc_get(fname); + enc = rb_str_enc_get(fname); fext = Qnil; } - if ((n = RSTRING_LEN(fname)) == 0 || !*(name = RSTRING_PTR(fname))) - return rb_str_new_shared(fname); - p = ruby_enc_find_basename(name, &f, &n, enc); + n = RSTRING_LEN(fname); + if (n == 0 || !*name) { + rb_enc_str_new(0, 0, enc); + } + + bool mb_enc = !rb_str_encindex_fastpath(rb_enc_to_index(enc)); + p = enc_find_basename(name, &f, &n, mb_enc, enc); if (n >= 0) { if (NIL_P(fext)) { f = n; @@ -5027,12 +5055,12 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _) } RB_GC_GUARD(fext); } - if (f == RSTRING_LEN(fname)) return rb_str_new_shared(fname); + if (f == RSTRING_LEN(fname)) { + return rb_str_new_shared(fname); + } } - basename = rb_str_new(p, f); - rb_enc_copy(basename, fname); - return basename; + return rb_enc_str_new(p, f, enc); } static VALUE rb_file_dirname_n(VALUE fname, int n); @@ -5350,7 +5378,7 @@ rb_file_join_ary(VALUE ary) rb_enc_copy(result, tmp); } else { - tail = chompdirsep(name, name + len, rb_enc_get(result)); + tail = chompdirsep(name, name + len, true, rb_enc_get(result)); if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) { rb_str_set_len(result, tail - name); } diff --git a/internal/string.h b/internal/string.h index dd5e20c0c68ed1..9212ce898653be 100644 --- a/internal/string.h +++ b/internal/string.h @@ -33,7 +33,13 @@ enum ruby_rstring_private_flags { static inline bool rb_str_encindex_fastpath(int encindex) { - // The overwhelming majority of strings are in one of these 3 encodings. + // The overwhelming majority of strings are in one of these 3 encodings, + // which are all either ASCII or perfect ASCII supersets. + // Hence you can use fast, single byte algorithms on them, such as `memchr` etc, + // without all the overhead of fetching the rb_encoding and using functions such as + // rb_enc_mbminlen etc. + // Many other encodings could qualify, but they are expected to be rare occurences, + // so it's better to keep that list small. switch (encindex) { case ENCINDEX_ASCII_8BIT: case ENCINDEX_UTF_8: diff --git a/lib/rubygems/ext/builder.rb b/lib/rubygems/ext/builder.rb index 350daf1e16d7b7..62d36bcf48d304 100644 --- a/lib/rubygems/ext/builder.rb +++ b/lib/rubygems/ext/builder.rb @@ -163,8 +163,6 @@ def initialize(spec, build_args = spec.build_args, target_rbconfig = Gem.target_ @gem_dir = spec.full_gem_path @target_rbconfig = target_rbconfig @build_jobs = build_jobs - - @ran_rake = false end ## @@ -177,7 +175,6 @@ def builder_for(extension) # :nodoc: when /configure/ then Gem::Ext::ConfigureBuilder when /rakefile/i, /mkrf_conf/i then - @ran_rake = true Gem::Ext::RakeBuilder when /CMakeLists.txt/ then Gem::Ext::CmakeBuilder.new @@ -250,8 +247,6 @@ def build_extensions FileUtils.rm_f @spec.gem_build_complete_path @spec.extensions.each do |extension| - break if @ran_rake - build_extension extension, dest_path end diff --git a/spec/ruby/core/file/basename_spec.rb b/spec/ruby/core/file/basename_spec.rb index 989409d76b91de..87695ab97be3ca 100644 --- a/spec/ruby/core/file/basename_spec.rb +++ b/spec/ruby/core/file/basename_spec.rb @@ -151,8 +151,34 @@ File.basename("c:\\bar.txt", ".*").should == "bar" File.basename("c:\\bar.txt.exe", ".*").should == "bar.txt" end + + it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence" do + # dir\fileソname.txt + path = "dir\\file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + path.valid_encoding?.should be_true + File.basename(path).should == "file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + end end + it "rejects strings encoded with non ASCII-compatible encodings" do + Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |enc| + begin + path = "/foo/bar".encode(enc) + rescue Encoding::ConverterNotFoundError + next + end + + -> { + File.basename(path) + }.should raise_error(Encoding::CompatibilityError) + end + end + + it "works with all ASCII-compatible encodings" do + Encoding.list.select(&:ascii_compatible?).each do |enc| + File.basename("/foo/bar".encode(enc)).should == "bar".encode(enc) + end + end it "returns the extension for a multibyte filename" do File.basename('/path/Офис.m4a').should == "Офис.m4a" diff --git a/test/rubygems/test_gem_ext_builder.rb b/test/rubygems/test_gem_ext_builder.rb index 34f85e6b756983..5fcbc3e2acfdec 100644 --- a/test/rubygems/test_gem_ext_builder.rb +++ b/test/rubygems/test_gem_ext_builder.rb @@ -18,7 +18,7 @@ def setup @spec = util_spec "a" - @builder = Gem::Ext::Builder.new @spec, "" + @builder = Gem::Ext::Builder.new @spec end def teardown @@ -201,6 +201,57 @@ def test_build_extensions_install_ext_only Gem.configuration.install_extension_in_lib = @orig_install_extension_in_lib end + def test_build_multiple_extensions + pend if RUBY_ENGINE == "truffleruby" + pend "terminates on ruby/ruby" if ruby_repo? + + extension_in_lib do + @spec.extensions << "ext/Rakefile" + @spec.extensions << "ext/extconf.rb" + + ext_dir = File.join @spec.gem_dir, "ext" + + FileUtils.mkdir_p ext_dir + + extconf_rb = File.join ext_dir, "extconf.rb" + rakefile = File.join ext_dir, "Rakefile" + + File.open extconf_rb, "w" do |f| + f.write <<-'RUBY' + require 'mkmf' + + create_makefile 'a' + RUBY + end + + File.open rakefile, "w" do |f| + f.write <<-RUBY + task :default do + FileUtils.touch File.join "#{ext_dir}", 'foo' + end + RUBY + end + + ext_lib_dir = File.join ext_dir, "lib" + FileUtils.mkdir ext_lib_dir + FileUtils.touch File.join ext_lib_dir, "a.rb" + FileUtils.mkdir File.join ext_lib_dir, "a" + FileUtils.touch File.join ext_lib_dir, "a", "b.rb" + + use_ui @ui do + @builder.build_extensions + end + + assert_path_exist @spec.extension_dir + assert_path_exist @spec.gem_build_complete_path + assert_path_exist File.join @spec.gem_dir, "ext", "foo" + assert_path_exist File.join @spec.extension_dir, "gem_make.out" + assert_path_exist File.join @spec.extension_dir, "a.rb" + assert_path_exist File.join @spec.gem_dir, "lib", "a.rb" + assert_path_exist File.join @spec.gem_dir, "lib", "a", "b.rb" + end + end + def test_build_extensions_none use_ui @ui do @builder.build_extensions