From 3d8af5df11e8dba8ccd37b03f9f5b92b2bc66dcb Mon Sep 17 00:00:00 2001 From: ydah Date: Tue, 5 Aug 2025 21:46:41 +0900 Subject: [PATCH 01/21] Fix typo in documentation comment for exc_inspect method in error.c --- error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/error.c b/error.c index e14ecd2393dcb6..9a758a7dd711b8 100644 --- a/error.c +++ b/error.c @@ -1883,7 +1883,7 @@ exc_inspect(VALUE exc) * # String * end * - * The value returned by this method migth be adjusted when raising (see Kernel#raise), + * The value returned by this method might be adjusted when raising (see Kernel#raise), * or during intermediate handling by #set_backtrace. * * See also #backtrace_locations that provide the same value, as structured objects. From a6aaeb9acfa47ebfafed051069f7ea870ded4b99 Mon Sep 17 00:00:00 2001 From: ArtSin Date: Tue, 5 Aug 2025 16:39:26 +0400 Subject: [PATCH 02/21] load.c: fix `prev_ext_config` clobbering in `require_internal` The variable `prev_ext_config` is modified by `ext_config_push` between `setjmp` and `longjmp` calls. Since `ext_config_push` and `ext_config_pop` are small and likely to be inlined, `prev_ext_config` can be allocated on a register and get clobbered. Fix by making it `volatile`. This bug can be observed by adding a check for values greater than 1 in `th2->ext_config.ractor_safe` after `ext_config_pop` and building with Clang. --- load.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/load.c b/load.c index 329b0f4b3b21ac..017c2364835188 100644 --- a/load.c +++ b/load.c @@ -1346,14 +1346,14 @@ rb_resolve_feature_path(VALUE klass, VALUE fname) } static void -ext_config_push(rb_thread_t *th, struct rb_ext_config *prev) +ext_config_push(rb_thread_t *th, volatile struct rb_ext_config *prev) { *prev = th->ext_config; th->ext_config = (struct rb_ext_config){0}; } static void -ext_config_pop(rb_thread_t *th, struct rb_ext_config *prev) +ext_config_pop(rb_thread_t *th, volatile struct rb_ext_config *prev) { th->ext_config = *prev; } @@ -1407,7 +1407,7 @@ require_internal(rb_execution_context_t *ec, VALUE fname, int exception, bool wa VALUE realpaths = get_loaded_features_realpaths(vm_ns); VALUE realpath_map = get_loaded_features_realpath_map(vm_ns); volatile bool reset_ext_config = false; - struct rb_ext_config prev_ext_config; + volatile struct rb_ext_config prev_ext_config; path = rb_str_encode_ospath(fname); RUBY_DTRACE_HOOK(REQUIRE_ENTRY, RSTRING_PTR(fname)); From b7f65f01eea8810ac8be64865e3415e634c3633a Mon Sep 17 00:00:00 2001 From: BurdetteLamar Date: Fri, 1 Aug 2025 09:19:18 -0500 Subject: [PATCH 03/21] [DOC] Tweaks for String#grapheme_clusters --- doc/string/grapheme_clusters.rdoc | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/string/grapheme_clusters.rdoc b/doc/string/grapheme_clusters.rdoc index 8c7f5a7259c69d..07ea1e318b5573 100644 --- a/doc/string/grapheme_clusters.rdoc +++ b/doc/string/grapheme_clusters.rdoc @@ -1,6 +1,19 @@ Returns an array of the grapheme clusters in +self+ (see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]): - s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈" + s = "ä-pqr-b̈-xyz-c̈" + s.size # => 16 + s.bytesize # => 19 + s.grapheme_clusters.size # => 13 s.grapheme_clusters # => ["ä", "-", "p", "q", "r", "-", "b̈", "-", "x", "y", "z", "-", "c̈"] + +Details: + + s = "ä" + s.grapheme_clusters # => ["ä"] # One grapheme cluster. + s.bytes # => [97, 204, 136] # Three bytes. + s.chars # => ["a", "̈"] # Two characters. + s.chars.map {|char| char.ord } # => [97, 776] # Their values. + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. From 409da39afbcd927577801be1626193e719f04005 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 5 Aug 2025 09:06:47 -0500 Subject: [PATCH 04/21] [DOC] Tweaks for String#gsub --- string.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/string.c b/string.c index 68c4f5f1d7948c..54e719623ebf0b 100644 --- a/string.c +++ b/string.c @@ -6601,14 +6601,41 @@ rb_str_gsub_bang(int argc, VALUE *argv, VALUE str) * gsub(pattern) {|match| ... } -> new_string * gsub(pattern) -> enumerator * - * Returns a copy of +self+ with all occurrences of the given +pattern+ replaced. + * Returns a copy of +self+ with zero or more substrings replaced. * - * See {Substitution Methods}[rdoc-ref:String@Substitution+Methods]. + * Argument +pattern+ may be a string or a Regexp; + * argument +replacement+ may be a string or a Hash. + * Varying types for the argument values makes this method very versatile. * - * Returns an Enumerator if no +replacement+ and no block given. + * Below are some simple examples; + * for many more examples, see {Substitution Methods}[rdoc-ref:String@Substitution+Methods]. + * + * With arguments +pattern+ and string +replacement+ given, + * replaces each matching substring with the given +replacement+ string: + * + * s = 'abracadabra' + * s.gsub('ab', 'AB') # => "ABracadABra" + * s.gsub(/[a-c]/, 'X') # => "XXrXXXdXXrX" + * + * With arguments +pattern+ and hash +replacement+ given, + * replaces each matching substring with a value from the given +replacement+ hash, + * or removes it: * - * Related: String#sub, String#sub!, String#gsub!. + * h = {'a' => 'A', 'b' => 'B', 'c' => 'C'} + * s.gsub(/[a-c]/, h) # => "ABrACAdABrA" # 'a', 'b', 'c' replaced. + * s.gsub(/[a-d]/, h) # => "ABrACAABrA" # 'd' removed. * + * With argument +pattern+ and a block given, + * calls the block with each matching substring; + * replaces that substring with the block's return value: + * + * s.gsub(/[a-d]/) {|substring| substring.upcase } + * # => "ABrACADABrA" + * + * With argument +pattern+ and no block given, + * returns a new Enumerator. + * + * Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. */ static VALUE From 72b8bb4cafe2512c8c5273b4614eba2028a4c350 Mon Sep 17 00:00:00 2001 From: BurdetteLamar Date: Sun, 3 Aug 2025 09:13:44 -0500 Subject: [PATCH 05/21] [DOC] Tweaks for String#gsub! --- string.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/string.c b/string.c index 54e719623ebf0b..fe848d6a4a821c 100644 --- a/string.c +++ b/string.c @@ -6576,15 +6576,12 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) * gsub!(pattern) {|match| ... } -> self or nil * gsub!(pattern) -> an_enumerator * - * Performs the specified substring replacement(s) on +self+; - * returns +self+ if any replacement occurred, +nil+ otherwise. + * Like String#gsub, except that: * - * See {Substitution Methods}[rdoc-ref:String@Substitution+Methods]. - * - * Returns an Enumerator if no +replacement+ and no block given. - * - * Related: String#sub, String#gsub, String#sub!. + * - Performs substitutions in +self+ (not in a copy of +self+). + * - Returns +self+ if any characters are removed, +nil+ otherwise. * + * Related: see {Modifying}[rdoc-ref:String@Modifying]. */ static VALUE From 8e9ea4c202fb104d7c17ad1f3cc59d697120501a Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 5 Aug 2025 17:07:37 +0900 Subject: [PATCH 06/21] Convert `PKG_CONFIG_PATH` to msys/cygwin path --- test/mkmf/test_pkg_config.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mkmf/test_pkg_config.rb b/test/mkmf/test_pkg_config.rb index adf5fa6e92b9f5..d0a2dc130ab11a 100644 --- a/test/mkmf/test_pkg_config.rb +++ b/test/mkmf/test_pkg_config.rb @@ -26,7 +26,7 @@ def setup Cflags: -I${includedir}/cflags-I --cflags-other EOF - @pkg_config_path, ENV["PKG_CONFIG_PATH"] = ENV["PKG_CONFIG_PATH"], @fixtures_dir + @pkg_config_path, ENV["PKG_CONFIG_PATH"] = ENV["PKG_CONFIG_PATH"], mkintpath(@fixtures_dir) end end From 79d8a3159f60d32396c8281fe438e86ab97e3daa Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 5 Aug 2025 21:34:03 +0900 Subject: [PATCH 07/21] Check if the found pkg-config is usable actually --- test/mkmf/test_pkg_config.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/mkmf/test_pkg_config.rb b/test/mkmf/test_pkg_config.rb index d0a2dc130ab11a..abeaf548f8cbe4 100644 --- a/test/mkmf/test_pkg_config.rb +++ b/test/mkmf/test_pkg_config.rb @@ -3,7 +3,9 @@ require 'shellwords' class TestMkmfPkgConfig < TestMkmf - PKG_CONFIG = config_string("PKG_CONFIG") {|path| find_executable0(path)} + PKG_CONFIG = config_string("PKG_CONFIG") do |path| + find_executable0(path, "--version") {$?.success?} + end def setup super From 4cfe5baf3d988425ad5e50984f6388a1165f92e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Barri=C3=A9?= Date: Fri, 1 Aug 2025 11:15:21 +0200 Subject: [PATCH 08/21] Use snprintf instead of deprecated sprintf When compiling with -fsanitize=address on macOS, the deprecation of sprintf is effective and prevents compiling yjit.c. More details: https://openradar.appspot.com/FB11761475. --- yjit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yjit.c b/yjit.c index f83a330bd6f927..44788eaf2c02fd 100644 --- a/yjit.c +++ b/yjit.c @@ -622,8 +622,9 @@ rb_yjit_iseq_inspect(const rb_iseq_t *iseq) const char *path = RSTRING_PTR(rb_iseq_path(iseq)); int lineno = iseq->body->location.code_location.beg_pos.lineno; - char *buf = ZALLOC_N(char, strlen(label) + strlen(path) + num_digits(lineno) + 3); - sprintf(buf, "%s@%s:%d", label, path, lineno); + const size_t size = strlen(label) + strlen(path) + num_digits(lineno) + 3; + char *buf = ZALLOC_N(char, size); + snprintf(buf, size, "%s@%s:%d", label, path, lineno); return buf; } From 0e33256c8e921e67682d6475634771576ae14748 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 6 Aug 2025 00:22:43 +0900 Subject: [PATCH 09/21] CI: Use `\e` instead of `\033` [ci skip] --- .github/workflows/mingw.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mingw.yml b/.github/workflows/mingw.yml index 72656fa7665f5c..7f8d05a634b4d8 100644 --- a/.github/workflows/mingw.yml +++ b/.github/workflows/mingw.yml @@ -76,19 +76,19 @@ jobs: # show where result=true for e in gcc.exe ragel.exe make.exe libcrypto-3-x64.dll libssl-3-x64.dll; do - echo ::group::$'\033[93m'$e$'\033[m' + echo ::group::$'\e[93m'$e$'\e[m' where $e || result=false echo ::endgroup:: done # show version for e in gcc ragel make "openssl version"; do case "$e" in *" "*) ;; *) e="$e --version";; esac - echo ::group::$'\033[93m'$e$'\033[m' + echo ::group::$'\e[93m'$e$'\e[m' $e || result=false echo ::endgroup:: done # show packages - echo ::group::$'\033[93m'Packages$'\033[m' + echo ::group::$'\e[93m'Packages$'\e[m' pacman -Qs mingw-w64-ucrt-x86_64-* | sed -n "s,local/mingw-w64-ucrt-x86_64-,,p" echo ::endgroup:: $result From 95320f1ddfd0d17ddad3c0a20b43636601b6bb55 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 5 Aug 2025 11:05:23 -0400 Subject: [PATCH 10/21] Fix RUBY_FREE_AT_EXIT for static symbols Since static symbols allocate memory, we should deallocate them at shutdown to prevent memory leaks from being reported with RUBY_FREE_AT_EXIT. --- internal/symbol.h | 1 + symbol.c | 14 ++++++++++++++ vm.c | 1 + 3 files changed, 16 insertions(+) diff --git a/internal/symbol.h b/internal/symbol.h index 131cddef906c57..8571c002896554 100644 --- a/internal/symbol.h +++ b/internal/symbol.h @@ -35,6 +35,7 @@ bool rb_obj_is_symbol_table(VALUE obj); void rb_sym_global_symbol_table_foreach_weak_reference(int (*callback)(VALUE *key, void *data), void *data); void rb_gc_free_dsymbol(VALUE); int rb_static_id_valid_p(ID id); +void rb_free_global_symbol_table(void); #if __has_builtin(__builtin_constant_p) #define rb_sym_intern_ascii_cstr(ptr) \ diff --git a/symbol.c b/symbol.c index 43ab0ffa3271c5..abb2c76dc2f758 100644 --- a/symbol.c +++ b/symbol.c @@ -386,6 +386,20 @@ rb_sym_global_symbols_update_references(void) symbols->ids = rb_gc_location(symbols->ids); } +static int +rb_free_global_symbol_table_i(VALUE *sym_ptr, void *data) +{ + sym_set_free(*sym_ptr); + + return ST_DELETE; +} + +void +rb_free_global_symbol_table(void) +{ + rb_concurrent_set_foreach_with_replace(ruby_global_symbols.sym_set, rb_free_global_symbol_table_i, NULL); +} + WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str)); WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id)); diff --git a/vm.c b/vm.c index 9284a2ce69ffe3..4223c2d2ac6f31 100644 --- a/vm.c +++ b/vm.c @@ -3146,6 +3146,7 @@ ruby_vm_destruct(rb_vm_t *vm) rb_free_encoded_insn_data(); rb_free_global_enc_table(); rb_free_loaded_builtin_table(); + rb_free_global_symbol_table(); rb_free_shared_fiber_pool(); rb_free_transcoder_table(); From 3ef8d833ab6e803cdff714ee454d7a4d47ee1c47 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 1 Aug 2025 18:17:33 +0200 Subject: [PATCH 11/21] rb_gc_impl_mark_and_move: avoid needless writes Assuming not all objects are moved during compaction, it is preferable to avoid rewriting references that haven't moved as to avoid invalidating potentially shared memory pages. --- gc/default/default.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gc/default/default.c b/gc/default/default.c index 47cfe3fb3baff3..9038a01e4e88a8 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4420,7 +4420,10 @@ rb_gc_impl_mark_and_move(void *objspace_ptr, VALUE *ptr) GC_ASSERT(objspace->flags.during_compacting); GC_ASSERT(during_gc); - *ptr = rb_gc_impl_location(objspace, *ptr); + VALUE destination = rb_gc_impl_location(objspace, *ptr); + if (destination != *ptr) { + *ptr = destination; + } } else { gc_mark(objspace, *ptr); From 18e37ac430e02d89738406c52d1faaaa08c2e0cf Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 6 May 2025 08:57:30 -0400 Subject: [PATCH 12/21] [ruby/prism] Optimize context_terminator with a lookup table https://github.com/ruby/prism/commit/483aa89234 --- prism/config.yml | 71 ++++++++++++------------ prism/prism.c | 137 ++++++++++++++++++++--------------------------- 2 files changed, 96 insertions(+), 112 deletions(-) diff --git a/prism/config.yml b/prism/config.yml index 257bd389ed02ab..b37b98cbdfe252 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -322,13 +322,42 @@ warnings: - UNUSED_LOCAL_VARIABLE - VOID_STATEMENT tokens: + # The order of the tokens at the beginning is important, because we use them + # for a lookup table. - name: EOF value: 1 comment: final token in the file - - name: MISSING - comment: "a token that was expected but not found" - - name: NOT_PROVIDED - comment: "a token that was not present but it is okay" + - name: BRACE_RIGHT + comment: "}" + - name: COMMA + comment: "," + - name: EMBEXPR_END + comment: "}" + - name: KEYWORD_DO + comment: "do" + - name: KEYWORD_ELSE + comment: "else" + - name: KEYWORD_ELSIF + comment: "elsif" + - name: KEYWORD_END + comment: "end" + - name: KEYWORD_ENSURE + comment: "ensure" + - name: KEYWORD_IN + comment: "in" + - name: KEYWORD_RESCUE + comment: "rescue" + - name: KEYWORD_THEN + comment: "then" + - name: KEYWORD_WHEN + comment: "when" + - name: NEWLINE + comment: "a newline character outside of other tokens" + - name: PARENTHESIS_RIGHT + comment: ")" + - name: SEMICOLON + comment: ";" + # Tokens from here on are not used for lookup, and can be in any order. - name: AMPERSAND comment: "&" - name: AMPERSAND_AMPERSAND @@ -351,8 +380,6 @@ tokens: comment: "!~" - name: BRACE_LEFT comment: "{" - - name: BRACE_RIGHT - comment: "}" - name: BRACKET_LEFT comment: "[" - name: BRACKET_LEFT_ARRAY @@ -375,8 +402,6 @@ tokens: comment: ":" - name: COLON_COLON comment: "::" - - name: COMMA - comment: "," - name: COMMENT comment: "a comment" - name: CONSTANT @@ -395,8 +420,6 @@ tokens: comment: "a line inside of embedded documentation" - name: EMBEXPR_BEGIN comment: "#{" - - name: EMBEXPR_END - comment: "}" - name: EMBVAR comment: "#" - name: EQUAL @@ -463,20 +486,10 @@ tokens: comment: "def" - name: KEYWORD_DEFINED comment: "defined?" - - name: KEYWORD_DO - comment: "do" - name: KEYWORD_DO_LOOP comment: "do keyword for a predicate in a while, until, or for loop" - - name: KEYWORD_ELSE - comment: "else" - - name: KEYWORD_ELSIF - comment: "elsif" - - name: KEYWORD_END - comment: "end" - name: KEYWORD_END_UPCASE comment: "END" - - name: KEYWORD_ENSURE - comment: "ensure" - name: KEYWORD_FALSE comment: "false" - name: KEYWORD_FOR @@ -485,8 +498,6 @@ tokens: comment: "if" - name: KEYWORD_IF_MODIFIER comment: "if in the modifier form" - - name: KEYWORD_IN - comment: "in" - name: KEYWORD_MODULE comment: "module" - name: KEYWORD_NEXT @@ -499,8 +510,6 @@ tokens: comment: "or" - name: KEYWORD_REDO comment: "redo" - - name: KEYWORD_RESCUE - comment: "rescue" - name: KEYWORD_RESCUE_MODIFIER comment: "rescue in the modifier form" - name: KEYWORD_RETRY @@ -511,8 +520,6 @@ tokens: comment: "self" - name: KEYWORD_SUPER comment: "super" - - name: KEYWORD_THEN - comment: "then" - name: KEYWORD_TRUE comment: "true" - name: KEYWORD_UNDEF @@ -525,8 +532,6 @@ tokens: comment: "until" - name: KEYWORD_UNTIL_MODIFIER comment: "until in the modifier form" - - name: KEYWORD_WHEN - comment: "when" - name: KEYWORD_WHILE comment: "while" - name: KEYWORD_WHILE_MODIFIER @@ -563,16 +568,12 @@ tokens: comment: "-=" - name: MINUS_GREATER comment: "->" - - name: NEWLINE - comment: "a newline character outside of other tokens" - name: NUMBERED_REFERENCE comment: "a numbered reference to a capture group in the previous regular expression match" - name: PARENTHESIS_LEFT comment: "(" - name: PARENTHESIS_LEFT_PARENTHESES comment: "( for a parentheses node" - - name: PARENTHESIS_RIGHT - comment: ")" - name: PERCENT comment: "%" - name: PERCENT_EQUAL @@ -605,8 +606,6 @@ tokens: comment: "the beginning of a regular expression" - name: REGEXP_END comment: "the end of a regular expression" - - name: SEMICOLON - comment: ";" - name: SLASH comment: "/" - name: SLASH_EQUAL @@ -651,6 +650,10 @@ tokens: comment: "a separator between words in a list" - name: __END__ comment: "marker for the point in the file at which the parser should stop" + - name: MISSING + comment: "a token that was expected but not found" + - name: NOT_PROVIDED + comment: "a token that was not present but it is okay" flags: - name: ArgumentsNodeFlags values: diff --git a/prism/prism.c b/prism/prism.c index d01c2a0766619c..9d2598e7465a0e 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -8586,85 +8586,66 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { /* Context manipulations */ /******************************************************************************/ -static bool -context_terminator(pm_context_t context, pm_token_t *token) { - switch (context) { - case PM_CONTEXT_MAIN: - case PM_CONTEXT_DEF_PARAMS: - case PM_CONTEXT_DEFINED: - case PM_CONTEXT_MULTI_TARGET: - case PM_CONTEXT_TERNARY: - case PM_CONTEXT_RESCUE_MODIFIER: - return token->type == PM_TOKEN_EOF; - case PM_CONTEXT_DEFAULT_PARAMS: - return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT; - case PM_CONTEXT_PREEXE: - case PM_CONTEXT_POSTEXE: - return token->type == PM_TOKEN_BRACE_RIGHT; - case PM_CONTEXT_MODULE: - case PM_CONTEXT_CLASS: - case PM_CONTEXT_SCLASS: - case PM_CONTEXT_LAMBDA_DO_END: - case PM_CONTEXT_DEF: - case PM_CONTEXT_BLOCK_KEYWORDS: - return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE; - case PM_CONTEXT_WHILE: - case PM_CONTEXT_UNTIL: - case PM_CONTEXT_ELSE: - case PM_CONTEXT_FOR: - case PM_CONTEXT_BEGIN_ENSURE: - case PM_CONTEXT_BLOCK_ENSURE: - case PM_CONTEXT_CLASS_ENSURE: - case PM_CONTEXT_DEF_ENSURE: - case PM_CONTEXT_LAMBDA_ENSURE: - case PM_CONTEXT_MODULE_ENSURE: - case PM_CONTEXT_SCLASS_ENSURE: - return token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_LOOP_PREDICATE: - return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN; - case PM_CONTEXT_FOR_INDEX: - return token->type == PM_TOKEN_KEYWORD_IN; - case PM_CONTEXT_CASE_WHEN: - return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE; - case PM_CONTEXT_CASE_IN: - return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE; - case PM_CONTEXT_IF: - case PM_CONTEXT_ELSIF: - return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_UNLESS: - return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_EMBEXPR: - return token->type == PM_TOKEN_EMBEXPR_END; - case PM_CONTEXT_BLOCK_BRACES: - return token->type == PM_TOKEN_BRACE_RIGHT; - case PM_CONTEXT_PARENS: - return token->type == PM_TOKEN_PARENTHESIS_RIGHT; - case PM_CONTEXT_BEGIN: - case PM_CONTEXT_BEGIN_RESCUE: - case PM_CONTEXT_BLOCK_RESCUE: - case PM_CONTEXT_CLASS_RESCUE: - case PM_CONTEXT_DEF_RESCUE: - case PM_CONTEXT_LAMBDA_RESCUE: - case PM_CONTEXT_MODULE_RESCUE: - case PM_CONTEXT_SCLASS_RESCUE: - return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_BEGIN_ELSE: - case PM_CONTEXT_BLOCK_ELSE: - case PM_CONTEXT_CLASS_ELSE: - case PM_CONTEXT_DEF_ELSE: - case PM_CONTEXT_LAMBDA_ELSE: - case PM_CONTEXT_MODULE_ELSE: - case PM_CONTEXT_SCLASS_ELSE: - return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_LAMBDA_BRACES: - return token->type == PM_TOKEN_BRACE_RIGHT; - case PM_CONTEXT_PREDICATE: - return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON; - case PM_CONTEXT_NONE: - return false; - } +static const uint32_t context_terminators[] = { + [PM_CONTEXT_NONE] = 0, + [PM_CONTEXT_BEGIN] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BEGIN_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BEGIN_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BEGIN_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BLOCK_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_BLOCK_KEYWORDS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_BLOCK_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BLOCK_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BLOCK_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_CASE_WHEN] = (1 << PM_TOKEN_KEYWORD_WHEN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE), + [PM_CONTEXT_CASE_IN] = (1 << PM_TOKEN_KEYWORD_IN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE), + [PM_CONTEXT_CLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_CLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_CLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_CLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_DEF_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF_PARAMS] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_DEFINED] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_DEFAULT_PARAMS] = (1 << PM_TOKEN_COMMA) | (1 << PM_TOKEN_PARENTHESIS_RIGHT), + [PM_CONTEXT_ELSE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_ELSIF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_EMBEXPR] = (1 << PM_TOKEN_EMBEXPR_END), + [PM_CONTEXT_FOR] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_FOR_INDEX] = (1 << PM_TOKEN_KEYWORD_IN), + [PM_CONTEXT_IF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LAMBDA_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_LAMBDA_DO_END] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_LAMBDA_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LAMBDA_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LAMBDA_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LOOP_PREDICATE] = (1 << PM_TOKEN_KEYWORD_DO) | (1 << PM_TOKEN_KEYWORD_THEN), + [PM_CONTEXT_MAIN] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_MODULE] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_MODULE_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_MODULE_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_MODULE_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_MULTI_TARGET] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_PARENS] = (1 << PM_TOKEN_PARENTHESIS_RIGHT), + [PM_CONTEXT_POSTEXE] = (1 << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_PREDICATE] = (1 << PM_TOKEN_KEYWORD_THEN) | (1 << PM_TOKEN_NEWLINE) | (1 << PM_TOKEN_SEMICOLON), + [PM_CONTEXT_PREEXE] = (1 << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_RESCUE_MODIFIER] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_SCLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_SCLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_SCLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_SCLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_TERNARY] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_UNLESS] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_UNTIL] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_WHILE] = (1 << PM_TOKEN_KEYWORD_END), +}; - return false; +static inline bool +context_terminator(pm_context_t context, pm_token_t *token) { + return token->type < 32 && (context_terminators[context] & (1 << token->type)); } /** From f814a777550ee39865f8fe26d0061cba8b715509 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Fri, 13 Jun 2025 12:44:08 +0900 Subject: [PATCH 13/21] [ruby/prism] Reject `true && not true` A command-call-like `not true` must be rejected after `&&` and `||`. https://bugs.ruby-lang.org/issues/21337 https://github.com/ruby/prism/commit/0513cf22ad --- prism/templates/src/diagnostic.c.erb | 3 +-- test/prism/errors/command_calls_31.txt | 7 +------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index 9a30a57e3b0eca..389b1dc484e1e9 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -184,8 +184,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_EXPECT_FOR_DELIMITER] = { "unexpected %s; expected a 'do', newline, or ';' after the 'for' loop collection", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = { "expected an identifier for the required parameter", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_IN_DELIMITER] = { "expected a delimiter after the patterns of an `in` clause", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN] = { "expected a `(` immediately after `not`", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER] = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_EXPECT_LPAREN_AFTER_NOT] = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = { "expected a `(` to start a required parameter", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_MESSAGE] = { "unexpected %s; expecting a message to send to the receiver", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_RBRACKET] = { "expected a matching `]`", PM_ERROR_LEVEL_SYNTAX }, diff --git a/test/prism/errors/command_calls_31.txt b/test/prism/errors/command_calls_31.txt index e662b254444821..72d5fc588f8a5c 100644 --- a/test/prism/errors/command_calls_31.txt +++ b/test/prism/errors/command_calls_31.txt @@ -7,11 +7,6 @@ true || not true ^~~~ unexpected 'true', expecting end-of-input true && not (true) - ^ expected a `(` immediately after `not` + ^ expected a `(` after `not` ^ unexpected '(', expecting end-of-input -true && not -true -^~~~ expected a `(` after `not` -^~~~ unexpected 'true', expecting end-of-input - From 087190fcd21973eb34b600fa82e6567189f4bbd7 Mon Sep 17 00:00:00 2001 From: ydah Date: Mon, 7 Jul 2025 19:06:26 +0900 Subject: [PATCH 14/21] [ruby/prism] Improve error handling for missing parentheses after 'not' in command calls https://github.com/ruby/prism/commit/d9151b8a82 --- prism/templates/src/diagnostic.c.erb | 3 ++- test/prism/errors/command_calls_31.txt | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index 389b1dc484e1e9..9a30a57e3b0eca 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -184,7 +184,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_EXPECT_FOR_DELIMITER] = { "unexpected %s; expected a 'do', newline, or ';' after the 'for' loop collection", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = { "expected an identifier for the required parameter", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_IN_DELIMITER] = { "expected a delimiter after the patterns of an `in` clause", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_EXPECT_LPAREN_AFTER_NOT] = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN] = { "expected a `(` immediately after `not`", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER] = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = { "expected a `(` to start a required parameter", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_MESSAGE] = { "unexpected %s; expecting a message to send to the receiver", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_RBRACKET] = { "expected a matching `]`", PM_ERROR_LEVEL_SYNTAX }, diff --git a/test/prism/errors/command_calls_31.txt b/test/prism/errors/command_calls_31.txt index 72d5fc588f8a5c..e662b254444821 100644 --- a/test/prism/errors/command_calls_31.txt +++ b/test/prism/errors/command_calls_31.txt @@ -7,6 +7,11 @@ true || not true ^~~~ unexpected 'true', expecting end-of-input true && not (true) - ^ expected a `(` after `not` + ^ expected a `(` immediately after `not` ^ unexpected '(', expecting end-of-input +true && not +true +^~~~ expected a `(` after `not` +^~~~ unexpected 'true', expecting end-of-input + From 2936da902cadc3e9c5737469892df9c116f24b77 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 2 Jun 2025 22:21:25 -0400 Subject: [PATCH 15/21] [ruby/prism] Handle new ractor stuff https://github.com/ruby/prism/commit/f5ded5104d --- test/prism/ractor_test.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/prism/ractor_test.rb b/test/prism/ractor_test.rb index fba10dbfe257a5..6169940bebc558 100644 --- a/test/prism/ractor_test.rb +++ b/test/prism/ractor_test.rb @@ -62,7 +62,11 @@ def with_ractor(*arguments, &block) if reader reader.gets.chomp else - puts(ignore_warnings { Ractor.new(*arguments, &block) }.value) + ractor = ignore_warnings { Ractor.new(*arguments, &block) } + + # Somewhere in the Ruby 3.5.* series, Ractor#take was removed and + # Ractor#value was added. + puts(ractor.respond_to?(:value) ? ractor.value : ractor.take) end end end From 2e672fdee0a81d21b877b7561a2f24f5d57c234d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Apr 2025 16:07:03 -0400 Subject: [PATCH 16/21] [ruby/prism] Bump JRuby version https://github.com/ruby/prism/commit/27d284bbb8 --- test/prism/ruby/parameters_signature_test.rb | 2 +- test/prism/ruby/parser_test.rb | 12 +----------- test/prism/ruby/ripper_test.rb | 2 +- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/test/prism/ruby/parameters_signature_test.rb b/test/prism/ruby/parameters_signature_test.rb index af5b54ed91eac9..ea1eea106ba81c 100644 --- a/test/prism/ruby/parameters_signature_test.rb +++ b/test/prism/ruby/parameters_signature_test.rb @@ -54,7 +54,7 @@ def test_keyrest_anonymous assert_parameters([[:keyrest, :**]], "**") end - if RUBY_ENGINE != "truffleruby" + if RUBY_ENGINE == "ruby" def test_key_ordering assert_parameters([[:keyreq, :a], [:keyreq, :b], [:key, :c], [:key, :d]], "a:, c: 1, b:, d: 2") end diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb index 2396f4186cec0e..156e8f9e9fad4d 100644 --- a/test/prism/ruby/parser_test.rb +++ b/test/prism/ruby/parser_test.rb @@ -99,16 +99,6 @@ class ParserTest < TestCase "seattlerb/regexp_esc_C_slash.txt", ] - # These files are either failing to parse or failing to translate, so we'll - # skip them for now. - skip_all = skip_incorrect | [ - ] - - # Not sure why these files are failing on JRuby, but skipping them for now. - if RUBY_ENGINE == "jruby" - skip_all.push("emoji_method_calls.txt", "symbols.txt") - end - # These files are failing to translate their lexer output into the lexer # output expected by the parser gem, so we'll skip them for now. skip_tokens = [ @@ -147,7 +137,7 @@ class ParserTest < TestCase define_method(fixture.test_name) do assert_equal_parses( fixture, - compare_asts: !skip_all.include?(fixture.path), + compare_asts: !skip_incorrect.include?(fixture.path), compare_tokens: !skip_tokens.include?(fixture.path), compare_comments: fixture.path != "embdoc_no_newline_at_end.txt" ) diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index d4b278c28e1366..5c37178889ecf5 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -return if RUBY_VERSION < "3.3" || RUBY_ENGINE == "truffleruby" +return if RUBY_VERSION < "3.3" || RUBY_ENGINE != "ruby" require_relative "../test_helper" From 6e2b139d6ac1bcbae26c06a4e3022e8b2be8307e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 6 May 2025 09:30:03 -0400 Subject: [PATCH 17/21] [ruby/prism] Ensure context terminators terminate expressions https://github.com/ruby/prism/commit/915f6b3ae9 --- prism/prism.c | 6 ++++++ test/prism/fixtures/case_in_hash_key.txt | 6 ++++++ 2 files changed, 12 insertions(+) create mode 100644 test/prism/fixtures/case_in_hash_key.txt diff --git a/prism/prism.c b/prism/prism.c index 9d2598e7465a0e..4d2c372d5dcbaa 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -22176,6 +22176,12 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc ) { node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1)); + if (context_terminator(parser->current_context->context, &parser->current)) { + // If this token terminates the current context, then we need to + // stop parsing the expression, as it has become a statement. + return node; + } + switch (PM_NODE_TYPE(node)) { case PM_MULTI_WRITE_NODE: // Multi-write nodes are statements, and cannot be followed by diff --git a/test/prism/fixtures/case_in_hash_key.txt b/test/prism/fixtures/case_in_hash_key.txt new file mode 100644 index 00000000000000..75ac8a846f7380 --- /dev/null +++ b/test/prism/fixtures/case_in_hash_key.txt @@ -0,0 +1,6 @@ +case 1 +in 2 + A.print message: +in 3 + A.print message: +end From b482e3d7cd77c688ed0e38e1c95c1f0b2b205cd6 Mon Sep 17 00:00:00 2001 From: S-H-GAMELINKS Date: Mon, 21 Jul 2025 11:00:33 +0900 Subject: [PATCH 18/21] [ruby/prism] Make `it = it` assign `nil` to match parse.y behavior [Bug #21139] Currently Prism returns `42` for code like this: ```ruby 42.tap { it = it; p it } # => 42 ``` But parse.y returns `nil`: ```ruby 42.tap { it = it; p it } # => nil ``` In parse.y, it on the right-hand side is parsed as a local variable. In Prism, it was parsed as the implicit block parameter it, which caused this inconsistent behavior. This change makes the right-hand side it to be parsed as a local variable, aligning with parse.y's behavior. Bug ticket: https://bugs.ruby-lang.org/issues/21139 https://github.com/ruby/prism/commit/cf3bbf9d2c --- prism/prism.c | 9 ++++++++- test/prism/fixtures/it_assignment.txt | 1 + test/prism/ruby/parser_test.rb | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 test/prism/fixtures/it_assignment.txt diff --git a/prism/prism.c b/prism/prism.c index 4d2c372d5dcbaa..85098c52d8b2fe 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -16459,7 +16459,14 @@ parse_variable(pm_parser_t *parser) { pm_node_list_append(¤t_scope->implicit_parameters, node); return node; - } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) { + } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) { + if (match1(parser, PM_TOKEN_EQUAL)) { + pm_constant_id_t name_id = pm_parser_local_add_location(parser, parser->previous.start, parser->previous.end, 0); + pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false); + + return node; + } + pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous); pm_node_list_append(¤t_scope->implicit_parameters, node); diff --git a/test/prism/fixtures/it_assignment.txt b/test/prism/fixtures/it_assignment.txt new file mode 100644 index 00000000000000..523b0ffe1e1250 --- /dev/null +++ b/test/prism/fixtures/it_assignment.txt @@ -0,0 +1 @@ +42.tap { it = it; p it } diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb index 156e8f9e9fad4d..7bf2e59a86c35c 100644 --- a/test/prism/ruby/parser_test.rb +++ b/test/prism/ruby/parser_test.rb @@ -183,6 +183,25 @@ def test_it_block_parameter_syntax assert_equal(it_block_parameter_sexp, actual_ast.to_sexp) end + def test_it_assignment_syntax + it_assignment_fixture_path = Pathname(__dir__).join('../../../test/prism/fixtures/it_assignment.txt') + + buffer = Parser::Source::Buffer.new(it_assignment_fixture_path) + buffer.source = it_assignment_fixture_path.read + actual_ast = Prism::Translation::Parser34.new.tokenize(buffer)[0] + + it_assignment_sexp = parse_sexp { + s(:block, + s(:send, s(:int, 42), :tap), + s(:args), + s(:begin, + s(:lvasgn, :it, s(:lvar, :it)), + s(:send, nil, :p, s(:lvar, :it)))) + } + + assert_equal(it_assignment_sexp, actual_ast.to_sexp) + end + private def assert_equal_parses(fixture, compare_asts: true, compare_tokens: true, compare_comments: true) From 02200ac81cd6712759995d920e354a0c3e96488c Mon Sep 17 00:00:00 2001 From: S-H-GAMELINKS Date: Wed, 23 Jul 2025 21:23:47 +0900 Subject: [PATCH 19/21] [ruby/prism] Add it read and assignment test https://github.com/ruby/prism/commit/659d769621 --- test/prism/fixtures/it_read_and_assignment.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 test/prism/fixtures/it_read_and_assignment.txt diff --git a/test/prism/fixtures/it_read_and_assignment.txt b/test/prism/fixtures/it_read_and_assignment.txt new file mode 100644 index 00000000000000..2cceeb2a548710 --- /dev/null +++ b/test/prism/fixtures/it_read_and_assignment.txt @@ -0,0 +1 @@ +42.tap { p it; it = it; p it } From a12e0c1db1d33525b904e2c0c801a22d42658d31 Mon Sep 17 00:00:00 2001 From: S-H-GAMELINKS Date: Wed, 23 Jul 2025 21:20:50 +0900 Subject: [PATCH 20/21] [ruby/prism] Remove uneeded test https://github.com/ruby/prism/commit/a6b448b10f --- test/prism/ruby/parser_test.rb | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb index 7bf2e59a86c35c..156e8f9e9fad4d 100644 --- a/test/prism/ruby/parser_test.rb +++ b/test/prism/ruby/parser_test.rb @@ -183,25 +183,6 @@ def test_it_block_parameter_syntax assert_equal(it_block_parameter_sexp, actual_ast.to_sexp) end - def test_it_assignment_syntax - it_assignment_fixture_path = Pathname(__dir__).join('../../../test/prism/fixtures/it_assignment.txt') - - buffer = Parser::Source::Buffer.new(it_assignment_fixture_path) - buffer.source = it_assignment_fixture_path.read - actual_ast = Prism::Translation::Parser34.new.tokenize(buffer)[0] - - it_assignment_sexp = parse_sexp { - s(:block, - s(:send, s(:int, 42), :tap), - s(:args), - s(:begin, - s(:lvasgn, :it, s(:lvar, :it)), - s(:send, nil, :p, s(:lvar, :it)))) - } - - assert_equal(it_assignment_sexp, actual_ast.to_sexp) - end - private def assert_equal_parses(fixture, compare_asts: true, compare_tokens: true, compare_comments: true) From 6bc07f142233759e747f04db82115593014af1fe Mon Sep 17 00:00:00 2001 From: S-H-GAMELINKS Date: Mon, 28 Jul 2025 23:25:49 +0900 Subject: [PATCH 21/21] [ruby/prism] Convert implicit parameter `it` to local variable in `parse_expression_infix` function https://github.com/ruby/prism/commit/fb136c6eb5 --- prism/prism.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/prism/prism.c b/prism/prism.c index 85098c52d8b2fe..afd767b84c8666 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -16459,14 +16459,7 @@ parse_variable(pm_parser_t *parser) { pm_node_list_append(¤t_scope->implicit_parameters, node); return node; - } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) { - if (match1(parser, PM_TOKEN_EQUAL)) { - pm_constant_id_t name_id = pm_parser_local_add_location(parser, parser->previous.start, parser->previous.end, 0); - pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false); - - return node; - } - + } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) { pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous); pm_node_list_append(¤t_scope->implicit_parameters, node); @@ -21190,6 +21183,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t } PRISM_FALLTHROUGH case PM_CASE_WRITABLE: { + // When we have `it = value`, we need to add `it` as a local + // variable before parsing the value, in case the value + // references the variable. + if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) { + pm_parser_local_add_location(parser, node->location.start, node->location.end, 0); + } + parser_lex(parser); pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));