From d89b45339223035433a8036c371f86e12134d36a Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Mon, 15 Sep 2025 08:10:13 -0500 Subject: [PATCH 1/9] [DOC] Tweaks for String#oct --- string.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 10 deletions(-) diff --git a/string.c b/string.c index 8d27248020cded..834641cfdbf0c1 100644 --- a/string.c +++ b/string.c @@ -10744,20 +10744,79 @@ rb_str_hex(VALUE str) * call-seq: * oct -> integer * - * Interprets the leading substring of +self+ as a string of octal digits - * (with an optional sign) and returns the corresponding number; - * returns zero if there is no such leading substring: + * Interprets the leading substring of +self+ as octal, binary, decimal, or hexadecimal, possibly signed; + * returns their value as an integer. * - * '123'.oct # => 83 - * '-377'.oct # => -255 - * '0377non-numeric'.oct # => 255 - * 'non-numeric'.oct # => 0 + * In brief: * - * If +self+ starts with 0, radix indicators are honored; - * see Kernel#Integer. + * # Interpreted as octal. + * '777'.oct # => 511 + * '777x'.oct # => 511 + * '0777'.oct # => 511 + * '0o777'.oct # => 511 + * '-777'.oct # => -511 + * # Not interpreted as octal. + * '0b111'.oct # => 7 # Interpreted as binary. + * '0d999'.oct # => 999 # Interpreted as decimal. + * '0xfff'.oct # => 4095 # Interpreted as hexadecimal. * - * Related: String#hex. + * The leading substring is interpreted as octal when it begins with: * + * - One or more character representing octal digits + * (each in the range '0'..'7'); + * the string to be interpreted ends at the first character that does not represent an octal digit: + * + * '7'.oct @ => 7 + * '11'.oct # => 9 + * '777'.oct # => 511 + * '0777'.oct # => 511 + * '7778'.oct # => 511 + * '777x'.oct # => 511 + * + * - '0o', followed by one or more octal digits: + * + * '0o777'.oct # => 511 + * '0o7778'.oct # => 511 + * + * The leading substring is _not_ interpreted as octal when it begins with: + * + * - '0b', followed by one or more characters representing binary digits + * (each in the range '0'..'1'); + * the string to be interpreted ends at the first character that does not represent a binary digit. + * the string is interpreted as binary digits (base 2): + * + * '0b111'.oct # => 7 + * '0b1112'.oct # => 7 + * + * - '0d', followed by one or more characters representing decimal digits + * (each in the range '0'..'9'); + * the string to be interpreted ends at the first character that does not represent a decimal digit. + * the string is interpreted as decimal digits (base 10): + * + * '0d999'.oct # => 999 + * '0d999x'.oct # => 999 + * + * - '0x', followed by one or more characters representing hexadecimal digits + * (each in one of the ranges '0'..'9', 'a'..'f', or 'A'..'F'); + * the string to be interpreted ends at the first character that does not represent a hexadecimal digit. + * the string is interpreted as hexadecimal digits (base 16): + * + * '0xfff'.oct # => 4095 + * '0xfffg'.oct # => 4095 + * + * Any of the above may prefixed with '-', which negates the interpreted value: + * + * '-777'.oct # => -511 + * '-0777'.oct # => -511 + * '-0b111'.oct # => -7 + * '-0xfff'.oct # => -4095 + * + * For any substring not described above, returns zero: + * + * 'foo'.oct # => 0 + * ''.oct # => 0 + * + * Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. */ static VALUE From 70210acab046239beedf880edf0330e3cb389ce8 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 15 Sep 2025 22:10:14 +0900 Subject: [PATCH 2/9] [ruby/optparse] Prefer `Proc` over `Method` The performances are: block > proc > method object. https://github.com/ruby/optparse/commit/9ec5d1d582 --- lib/optparse.rb | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/lib/optparse.rb b/lib/optparse.rb index 06e33db1f533c4..ea6844b9558e5c 100644 --- a/lib/optparse.rb +++ b/lib/optparse.rb @@ -1855,7 +1855,7 @@ def permute(*argv, **keywords) # def permute!(argv = default_argv, **keywords) nonopts = [] - order!(argv, **keywords, &nonopts.method(:<<)) + order!(argv, **keywords) {|nonopt| nonopts << nonopt} argv[0, 0] = nonopts argv end @@ -1908,13 +1908,16 @@ def getopts(*args, symbolize_names: false, **keywords) single_options, *long_options = *args result = {} + setter = (symbolize_names ? + ->(name, val) {result[name.to_sym] = val} + : ->(name, val) {result[name] = val}) single_options.scan(/(.)(:)?/) do |opt, val| if val - result[opt] = nil + setter[opt, nil] define("-#{opt} VAL") else - result[opt] = false + setter[opt, false] define("-#{opt}") end end if single_options @@ -1923,16 +1926,16 @@ def getopts(*args, symbolize_names: false, **keywords) arg, desc = arg.split(';', 2) opt, val = arg.split(':', 2) if val - result[opt] = val.empty? ? nil : val + setter[opt, (val unless val.empty?)] define("--#{opt}=#{result[opt] || "VAL"}", *[desc].compact) else - result[opt] = false + setter[opt, false] define("--#{opt}", *[desc].compact) end end - parse_in_order(argv, result.method(:[]=), **keywords) - symbolize_names ? result.transform_keys(&:to_sym) : result + parse_in_order(argv, setter, **keywords) + result end # @@ -1982,7 +1985,7 @@ def complete(typ, opt, icase = false, *pat) # :nodoc: visit(:complete, typ, opt, icase, *pat) {|o, *sw| return sw} } exc = ambiguous ? AmbiguousOption : InvalidOption - raise exc.new(opt, additional: self.method(:additional_message).curry[typ]) + raise exc.new(opt, additional: proc {|o| additional_message(typ, o)}) end private :complete @@ -2273,9 +2276,10 @@ def recover(argv) argv end + DIR = File.join(__dir__, '') def self.filter_backtrace(array) unless $DEBUG - array.delete_if(&%r"\A#{Regexp.quote(__FILE__)}:"o.method(:=~)) + array.delete_if {|bt| bt.start_with?(DIR)} end array end From b0ce1fd549a3227ef4d9f65e4cdf0df93e4adeb0 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Sat, 13 Sep 2025 14:10:57 -0400 Subject: [PATCH 3/9] Combine rb_imemo_tmpbuf_auto_free_pointer and rb_imemo_tmpbuf_new --- dir.c | 2 +- imemo.c | 15 +++------------ internal/imemo.h | 8 +++----- process.c | 6 +++--- util.c | 2 +- 5 files changed, 11 insertions(+), 22 deletions(-) diff --git a/dir.c b/dir.c index b934f2795c90e0..25ed59c668fab6 100644 --- a/dir.c +++ b/dir.c @@ -1480,7 +1480,7 @@ rb_dir_getwd_ospath(void) VALUE cwd; VALUE path_guard; - path_guard = rb_imemo_tmpbuf_auto_free_pointer(); + path_guard = rb_imemo_tmpbuf_new(); path = ruby_getcwd(); rb_imemo_tmpbuf_set_ptr(path_guard, path); #ifdef __APPLE__ diff --git a/imemo.c b/imemo.c index 02cba387bd22be..1bef7a71a69e77 100644 --- a/imemo.c +++ b/imemo.c @@ -48,23 +48,14 @@ rb_imemo_new(enum imemo_type type, VALUE v0, size_t size) return (VALUE)obj; } -static rb_imemo_tmpbuf_t * -rb_imemo_tmpbuf_new(void) -{ - return IMEMO_NEW(rb_imemo_tmpbuf_t, imemo_tmpbuf, 0); -} - void * rb_alloc_tmp_buffer_with_count(volatile VALUE *store, size_t size, size_t cnt) { - void *ptr; - rb_imemo_tmpbuf_t *tmpbuf; - /* Keep the order; allocate an empty imemo first then xmalloc, to * get rid of potential memory leak */ - tmpbuf = rb_imemo_tmpbuf_new(); + rb_imemo_tmpbuf_t *tmpbuf = (rb_imemo_tmpbuf_t *)rb_imemo_tmpbuf_new(); *store = (VALUE)tmpbuf; - ptr = ruby_xmalloc(size); + void *ptr = ruby_xmalloc(size); tmpbuf->ptr = ptr; tmpbuf->cnt = cnt; @@ -97,7 +88,7 @@ rb_free_tmp_buffer(volatile VALUE *store) rb_imemo_tmpbuf_t * rb_imemo_tmpbuf_parser_heap(void *buf, rb_imemo_tmpbuf_t *old_heap, size_t cnt) { - rb_imemo_tmpbuf_t *tmpbuf = rb_imemo_tmpbuf_new(); + rb_imemo_tmpbuf_t *tmpbuf = (rb_imemo_tmpbuf_t *)rb_imemo_tmpbuf_new(); tmpbuf->ptr = buf; tmpbuf->next = old_heap; tmpbuf->cnt = cnt; diff --git a/internal/imemo.h b/internal/imemo.h index de39102432ea84..3673190809e86a 100644 --- a/internal/imemo.h +++ b/internal/imemo.h @@ -138,10 +138,8 @@ static inline enum imemo_type imemo_type(VALUE imemo); static inline int imemo_type_p(VALUE imemo, enum imemo_type imemo_type); static inline bool imemo_throw_data_p(VALUE imemo); static inline struct vm_ifunc *rb_vm_ifunc_proc_new(rb_block_call_func_t func, const void *data); -static inline VALUE rb_imemo_tmpbuf_auto_free_pointer(void); static inline void *RB_IMEMO_TMPBUF_PTR(VALUE v); static inline void *rb_imemo_tmpbuf_set_ptr(VALUE v, void *ptr); -static inline VALUE rb_imemo_tmpbuf_auto_free_pointer_new_from_an_RString(VALUE str); static inline void MEMO_V1_SET(struct MEMO *m, VALUE v); static inline void MEMO_V2_SET(struct MEMO *m, VALUE v); @@ -201,7 +199,7 @@ rb_vm_ifunc_proc_new(rb_block_call_func_t func, const void *data) } static inline VALUE -rb_imemo_tmpbuf_auto_free_pointer(void) +rb_imemo_tmpbuf_new(void) { return rb_imemo_new(imemo_tmpbuf, 0, sizeof(rb_imemo_tmpbuf_t)); } @@ -220,7 +218,7 @@ rb_imemo_tmpbuf_set_ptr(VALUE v, void *ptr) } static inline VALUE -rb_imemo_tmpbuf_auto_free_pointer_new_from_an_RString(VALUE str) +rb_imemo_tmpbuf_new_from_an_RString(VALUE str) { const void *src; VALUE imemo; @@ -230,7 +228,7 @@ rb_imemo_tmpbuf_auto_free_pointer_new_from_an_RString(VALUE str) StringValue(str); /* create tmpbuf to keep the pointer before xmalloc */ - imemo = rb_imemo_tmpbuf_auto_free_pointer(); + imemo = rb_imemo_tmpbuf_new(); tmpbuf = (rb_imemo_tmpbuf_t *)imemo; len = RSTRING_LEN(str); src = RSTRING_PTR(str); diff --git a/process.c b/process.c index da9ce74027ce3c..0fb727db8af9f8 100644 --- a/process.c +++ b/process.c @@ -2635,7 +2635,7 @@ rb_exec_fillarg(VALUE prog, int argc, VALUE *argv, VALUE env, VALUE opthash, VAL } rb_str_buf_cat(argv_str, (char *)&null, sizeof(null)); /* terminator for execve. */ eargp->invoke.cmd.argv_str = - rb_imemo_tmpbuf_auto_free_pointer_new_from_an_RString(argv_str); + rb_imemo_tmpbuf_new_from_an_RString(argv_str); } RB_GC_GUARD(execarg_obj); } @@ -2726,7 +2726,7 @@ open_func(void *ptr) static void rb_execarg_allocate_dup2_tmpbuf(struct rb_execarg *eargp, long len) { - VALUE tmpbuf = rb_imemo_tmpbuf_auto_free_pointer(); + VALUE tmpbuf = rb_imemo_tmpbuf_new(); rb_imemo_tmpbuf_set_ptr(tmpbuf, ruby_xmalloc(run_exec_dup2_tmpbuf_size(len))); eargp->dup2_tmpbuf = tmpbuf; } @@ -2830,7 +2830,7 @@ rb_execarg_parent_start1(VALUE execarg_obj) p = NULL; rb_str_buf_cat(envp_str, (char *)&p, sizeof(p)); eargp->envp_str = - rb_imemo_tmpbuf_auto_free_pointer_new_from_an_RString(envp_str); + rb_imemo_tmpbuf_new_from_an_RString(envp_str); eargp->envp_buf = envp_buf; /* diff --git a/util.c b/util.c index 3e8ae590a8d54e..4caa324849af31 100644 --- a/util.c +++ b/util.c @@ -529,7 +529,7 @@ ruby_strdup(const char *str) char * ruby_getcwd(void) { - VALUE guard = rb_imemo_tmpbuf_auto_free_pointer(); + VALUE guard = rb_imemo_tmpbuf_new(); int size = 200; char *buf = xmalloc(size); From 23c04a9be7a842c3897ee7a8c926353b607bf7dc Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 15 Sep 2025 23:18:17 +0900 Subject: [PATCH 4/9] [DOC] Markup as markdown --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 7157700816c3e8..7534539a21d2c9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -63,7 +63,7 @@ Note: We're only listing outstanding class updates. * IO - * `IO.select` accepts +Float::INFINITY+ as a timeout argument. + * `IO.select` accepts `Float::INFINITY` as a timeout argument. [[Feature #20610]] * Math From 6c34880111a0978407224fc353c35f2a3a0a981a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 15 Sep 2025 10:18:46 -0400 Subject: [PATCH 5/9] [ruby/prism] Fix character literal forced encoding If a character literal was followed by a string concatenation, then the forced encoding of the string concatenation could accidentally overwrite the explicit encoding of the character literal. We now handle this properly. https://github.com/ruby/prism/commit/125c375d74 --- prism/prism.c | 30 ++++++++++++++--------- test/prism/fixtures/character_literal.txt | 2 ++ test/prism/ruby/ruby_parser_test.rb | 1 + 3 files changed, 22 insertions(+), 11 deletions(-) create mode 100644 test/prism/fixtures/character_literal.txt diff --git a/prism/prism.c b/prism/prism.c index 06419d13789e45..2e202c37456ea5 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -18491,20 +18491,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b return (pm_node_t *) node; } case PM_TOKEN_CHARACTER_LITERAL: { - parser_lex(parser); - - pm_token_t opening = parser->previous; - opening.type = PM_TOKEN_STRING_BEGIN; - opening.end = opening.start + 1; - - pm_token_t content = parser->previous; - content.type = PM_TOKEN_STRING_CONTENT; - content.start = content.start + 1; - pm_token_t closing = not_provided(parser); - pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing); + pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string( + parser, + &(pm_token_t) { + .type = PM_TOKEN_STRING_BEGIN, + .start = parser->current.start, + .end = parser->current.start + 1 + }, + &(pm_token_t) { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->current.start + 1, + .end = parser->current.end + }, + &closing + ); + pm_node_flag_set(node, parse_unescaped_encoding(parser)); + // Skip past the character literal here, since now we have handled + // parser->explicit_encoding correctly. + parser_lex(parser); + // Characters can be followed by strings in which case they are // automatically concatenated. if (match1(parser, PM_TOKEN_STRING_BEGIN)) { diff --git a/test/prism/fixtures/character_literal.txt b/test/prism/fixtures/character_literal.txt new file mode 100644 index 00000000000000..920332123f13ac --- /dev/null +++ b/test/prism/fixtures/character_literal.txt @@ -0,0 +1,2 @@ +# encoding: Windows-31J +p ?\u3042"" diff --git a/test/prism/ruby/ruby_parser_test.rb b/test/prism/ruby/ruby_parser_test.rb index bcaed7979150bc..b21ad81391ed1e 100644 --- a/test/prism/ruby/ruby_parser_test.rb +++ b/test/prism/ruby/ruby_parser_test.rb @@ -16,6 +16,7 @@ module Prism class RubyParserTest < TestCase todos = [ + "character_literal.txt", "encoding_euc_jp.txt", "regex_char_width.txt", "seattlerb/masgn_colon3.txt", From b08573c8150c97822b05b743d5ebb8c4fff5315f Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Sun, 14 Sep 2025 11:43:34 +0200 Subject: [PATCH 6/9] [ruby/prism] Fix back reference for ruby_parser on Ruby 2.7 Symbol#name is only a thing since Ruby 3.0 https://github.com/ruby/prism/commit/2de82b15fc --- lib/prism/translation/ruby_parser.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb index ac538a2e97ae43..2ca7da0bf2a5d5 100644 --- a/lib/prism/translation/ruby_parser.rb +++ b/lib/prism/translation/ruby_parser.rb @@ -152,7 +152,7 @@ def visit_assoc_splat_node(node) # ^^ # ``` def visit_back_reference_read_node(node) - s(node, :back_ref, node.name.name.delete_prefix("$").to_sym) + s(node, :back_ref, node.name.to_s.delete_prefix("$").to_sym) end # ``` From 61df125325c4f5cad0fd63a831f7afd4c3e71dba Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 16 Sep 2025 00:11:14 +0900 Subject: [PATCH 7/9] [DOC] Markup code in `Float::MIN` document --- numeric.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numeric.c b/numeric.c index 89cff8a730fc9c..de5b02aaf9eb41 100644 --- a/numeric.c +++ b/numeric.c @@ -6455,7 +6455,7 @@ Init_Numeric(void) * * If the platform supports denormalized numbers, * there are numbers between zero and Float::MIN. - * 0.0.next_float returns the smallest positive floating point number + * +0.0.next_float+ returns the smallest positive floating point number * including denormalized numbers. */ rb_define_const(rb_cFloat, "MIN", DBL2NUM(DBL_MIN)); From 1e3e04cd657c35fdd8d95096195d6b72b64e516c Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 15 Sep 2025 10:47:14 -0400 Subject: [PATCH 8/9] Move rb_imemo_tmpbuf_new to imemo.c --- imemo.c | 6 ++++++ internal/imemo.h | 7 +------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/imemo.c b/imemo.c index 1bef7a71a69e77..1fc49c434af75e 100644 --- a/imemo.c +++ b/imemo.c @@ -48,6 +48,12 @@ rb_imemo_new(enum imemo_type type, VALUE v0, size_t size) return (VALUE)obj; } +VALUE +rb_imemo_tmpbuf_new(void) +{ + return rb_imemo_new(imemo_tmpbuf, 0, sizeof(rb_imemo_tmpbuf_t)); +} + void * rb_alloc_tmp_buffer_with_count(volatile VALUE *store, size_t size, size_t cnt) { diff --git a/internal/imemo.h b/internal/imemo.h index 3673190809e86a..de617d94c1bf5c 100644 --- a/internal/imemo.h +++ b/internal/imemo.h @@ -132,6 +132,7 @@ struct MEMO { #ifndef RUBY_RUBYPARSER_H typedef struct rb_imemo_tmpbuf_struct rb_imemo_tmpbuf_t; #endif +VALUE rb_imemo_tmpbuf_new(void); rb_imemo_tmpbuf_t *rb_imemo_tmpbuf_parser_heap(void *buf, rb_imemo_tmpbuf_t *old_heap, size_t cnt); struct vm_ifunc *rb_vm_ifunc_new(rb_block_call_func_t func, const void *data, int min_argc, int max_argc); static inline enum imemo_type imemo_type(VALUE imemo); @@ -198,12 +199,6 @@ rb_vm_ifunc_proc_new(rb_block_call_func_t func, const void *data) return rb_vm_ifunc_new(func, data, 0, UNLIMITED_ARGUMENTS); } -static inline VALUE -rb_imemo_tmpbuf_new(void) -{ - return rb_imemo_new(imemo_tmpbuf, 0, sizeof(rb_imemo_tmpbuf_t)); -} - static inline void * RB_IMEMO_TMPBUF_PTR(VALUE v) { From 7dd9c76ad46af63f76e0df243f76a1720f54d50d Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 15 Sep 2025 10:50:01 -0400 Subject: [PATCH 9/9] Make imemo_tmpbuf not write-barrier protected imemo_tmpbuf is not write-barrier protected and uses mark maybe to mark the buffer it holds. The normal rb_imemo_new creates a write-barrier protected object which can make the tmpbuf miss marking references. --- imemo.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/imemo.c b/imemo.c index 1fc49c434af75e..7cec33bc1edf00 100644 --- a/imemo.c +++ b/imemo.c @@ -51,7 +51,10 @@ rb_imemo_new(enum imemo_type type, VALUE v0, size_t size) VALUE rb_imemo_tmpbuf_new(void) { - return rb_imemo_new(imemo_tmpbuf, 0, sizeof(rb_imemo_tmpbuf_t)); + VALUE flags = T_IMEMO | (imemo_tmpbuf << FL_USHIFT); + NEWOBJ_OF(obj, rb_imemo_tmpbuf_t, 0, flags, sizeof(rb_imemo_tmpbuf_t), NULL); + + return (VALUE)obj; } void *