diff --git a/.github/workflows/mingw.yml b/.github/workflows/mingw.yml index 72656fa7665f5c..7f8d05a634b4d8 100644 --- a/.github/workflows/mingw.yml +++ b/.github/workflows/mingw.yml @@ -76,19 +76,19 @@ jobs: # show where result=true for e in gcc.exe ragel.exe make.exe libcrypto-3-x64.dll libssl-3-x64.dll; do - echo ::group::$'\033[93m'$e$'\033[m' + echo ::group::$'\e[93m'$e$'\e[m' where $e || result=false echo ::endgroup:: done # show version for e in gcc ragel make "openssl version"; do case "$e" in *" "*) ;; *) e="$e --version";; esac - echo ::group::$'\033[93m'$e$'\033[m' + echo ::group::$'\e[93m'$e$'\e[m' $e || result=false echo ::endgroup:: done # show packages - echo ::group::$'\033[93m'Packages$'\033[m' + echo ::group::$'\e[93m'Packages$'\e[m' pacman -Qs mingw-w64-ucrt-x86_64-* | sed -n "s,local/mingw-w64-ucrt-x86_64-,,p" echo ::endgroup:: $result diff --git a/doc/string/grapheme_clusters.rdoc b/doc/string/grapheme_clusters.rdoc index 8c7f5a7259c69d..07ea1e318b5573 100644 --- a/doc/string/grapheme_clusters.rdoc +++ b/doc/string/grapheme_clusters.rdoc @@ -1,6 +1,19 @@ Returns an array of the grapheme clusters in +self+ (see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]): - s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈" + s = "ä-pqr-b̈-xyz-c̈" + s.size # => 16 + s.bytesize # => 19 + s.grapheme_clusters.size # => 13 s.grapheme_clusters # => ["ä", "-", "p", "q", "r", "-", "b̈", "-", "x", "y", "z", "-", "c̈"] + +Details: + + s = "ä" + s.grapheme_clusters # => ["ä"] # One grapheme cluster. + s.bytes # => [97, 204, 136] # Three bytes. + s.chars # => ["a", "̈"] # Two characters. + s.chars.map {|char| char.ord } # => [97, 776] # Their values. + +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/error.c b/error.c index e14ecd2393dcb6..9a758a7dd711b8 100644 --- a/error.c +++ b/error.c @@ -1883,7 +1883,7 @@ exc_inspect(VALUE exc) * # String * end * - * The value returned by this method migth be adjusted when raising (see Kernel#raise), + * The value returned by this method might be adjusted when raising (see Kernel#raise), * or during intermediate handling by #set_backtrace. * * See also #backtrace_locations that provide the same value, as structured objects. diff --git a/gc/default/default.c b/gc/default/default.c index 47cfe3fb3baff3..9038a01e4e88a8 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4420,7 +4420,10 @@ rb_gc_impl_mark_and_move(void *objspace_ptr, VALUE *ptr) GC_ASSERT(objspace->flags.during_compacting); GC_ASSERT(during_gc); - *ptr = rb_gc_impl_location(objspace, *ptr); + VALUE destination = rb_gc_impl_location(objspace, *ptr); + if (destination != *ptr) { + *ptr = destination; + } } else { gc_mark(objspace, *ptr); diff --git a/internal/symbol.h b/internal/symbol.h index 131cddef906c57..8571c002896554 100644 --- a/internal/symbol.h +++ b/internal/symbol.h @@ -35,6 +35,7 @@ bool rb_obj_is_symbol_table(VALUE obj); void rb_sym_global_symbol_table_foreach_weak_reference(int (*callback)(VALUE *key, void *data), void *data); void rb_gc_free_dsymbol(VALUE); int rb_static_id_valid_p(ID id); +void rb_free_global_symbol_table(void); #if __has_builtin(__builtin_constant_p) #define rb_sym_intern_ascii_cstr(ptr) \ diff --git a/load.c b/load.c index 329b0f4b3b21ac..017c2364835188 100644 --- a/load.c +++ b/load.c @@ -1346,14 +1346,14 @@ rb_resolve_feature_path(VALUE klass, VALUE fname) } static void -ext_config_push(rb_thread_t *th, struct rb_ext_config *prev) +ext_config_push(rb_thread_t *th, volatile struct rb_ext_config *prev) { *prev = th->ext_config; th->ext_config = (struct rb_ext_config){0}; } static void -ext_config_pop(rb_thread_t *th, struct rb_ext_config *prev) +ext_config_pop(rb_thread_t *th, volatile struct rb_ext_config *prev) { th->ext_config = *prev; } @@ -1407,7 +1407,7 @@ require_internal(rb_execution_context_t *ec, VALUE fname, int exception, bool wa VALUE realpaths = get_loaded_features_realpaths(vm_ns); VALUE realpath_map = get_loaded_features_realpath_map(vm_ns); volatile bool reset_ext_config = false; - struct rb_ext_config prev_ext_config; + volatile struct rb_ext_config prev_ext_config; path = rb_str_encode_ospath(fname); RUBY_DTRACE_HOOK(REQUIRE_ENTRY, RSTRING_PTR(fname)); diff --git a/prism/config.yml b/prism/config.yml index 257bd389ed02ab..b37b98cbdfe252 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -322,13 +322,42 @@ warnings: - UNUSED_LOCAL_VARIABLE - VOID_STATEMENT tokens: + # The order of the tokens at the beginning is important, because we use them + # for a lookup table. - name: EOF value: 1 comment: final token in the file - - name: MISSING - comment: "a token that was expected but not found" - - name: NOT_PROVIDED - comment: "a token that was not present but it is okay" + - name: BRACE_RIGHT + comment: "}" + - name: COMMA + comment: "," + - name: EMBEXPR_END + comment: "}" + - name: KEYWORD_DO + comment: "do" + - name: KEYWORD_ELSE + comment: "else" + - name: KEYWORD_ELSIF + comment: "elsif" + - name: KEYWORD_END + comment: "end" + - name: KEYWORD_ENSURE + comment: "ensure" + - name: KEYWORD_IN + comment: "in" + - name: KEYWORD_RESCUE + comment: "rescue" + - name: KEYWORD_THEN + comment: "then" + - name: KEYWORD_WHEN + comment: "when" + - name: NEWLINE + comment: "a newline character outside of other tokens" + - name: PARENTHESIS_RIGHT + comment: ")" + - name: SEMICOLON + comment: ";" + # Tokens from here on are not used for lookup, and can be in any order. - name: AMPERSAND comment: "&" - name: AMPERSAND_AMPERSAND @@ -351,8 +380,6 @@ tokens: comment: "!~" - name: BRACE_LEFT comment: "{" - - name: BRACE_RIGHT - comment: "}" - name: BRACKET_LEFT comment: "[" - name: BRACKET_LEFT_ARRAY @@ -375,8 +402,6 @@ tokens: comment: ":" - name: COLON_COLON comment: "::" - - name: COMMA - comment: "," - name: COMMENT comment: "a comment" - name: CONSTANT @@ -395,8 +420,6 @@ tokens: comment: "a line inside of embedded documentation" - name: EMBEXPR_BEGIN comment: "#{" - - name: EMBEXPR_END - comment: "}" - name: EMBVAR comment: "#" - name: EQUAL @@ -463,20 +486,10 @@ tokens: comment: "def" - name: KEYWORD_DEFINED comment: "defined?" - - name: KEYWORD_DO - comment: "do" - name: KEYWORD_DO_LOOP comment: "do keyword for a predicate in a while, until, or for loop" - - name: KEYWORD_ELSE - comment: "else" - - name: KEYWORD_ELSIF - comment: "elsif" - - name: KEYWORD_END - comment: "end" - name: KEYWORD_END_UPCASE comment: "END" - - name: KEYWORD_ENSURE - comment: "ensure" - name: KEYWORD_FALSE comment: "false" - name: KEYWORD_FOR @@ -485,8 +498,6 @@ tokens: comment: "if" - name: KEYWORD_IF_MODIFIER comment: "if in the modifier form" - - name: KEYWORD_IN - comment: "in" - name: KEYWORD_MODULE comment: "module" - name: KEYWORD_NEXT @@ -499,8 +510,6 @@ tokens: comment: "or" - name: KEYWORD_REDO comment: "redo" - - name: KEYWORD_RESCUE - comment: "rescue" - name: KEYWORD_RESCUE_MODIFIER comment: "rescue in the modifier form" - name: KEYWORD_RETRY @@ -511,8 +520,6 @@ tokens: comment: "self" - name: KEYWORD_SUPER comment: "super" - - name: KEYWORD_THEN - comment: "then" - name: KEYWORD_TRUE comment: "true" - name: KEYWORD_UNDEF @@ -525,8 +532,6 @@ tokens: comment: "until" - name: KEYWORD_UNTIL_MODIFIER comment: "until in the modifier form" - - name: KEYWORD_WHEN - comment: "when" - name: KEYWORD_WHILE comment: "while" - name: KEYWORD_WHILE_MODIFIER @@ -563,16 +568,12 @@ tokens: comment: "-=" - name: MINUS_GREATER comment: "->" - - name: NEWLINE - comment: "a newline character outside of other tokens" - name: NUMBERED_REFERENCE comment: "a numbered reference to a capture group in the previous regular expression match" - name: PARENTHESIS_LEFT comment: "(" - name: PARENTHESIS_LEFT_PARENTHESES comment: "( for a parentheses node" - - name: PARENTHESIS_RIGHT - comment: ")" - name: PERCENT comment: "%" - name: PERCENT_EQUAL @@ -605,8 +606,6 @@ tokens: comment: "the beginning of a regular expression" - name: REGEXP_END comment: "the end of a regular expression" - - name: SEMICOLON - comment: ";" - name: SLASH comment: "/" - name: SLASH_EQUAL @@ -651,6 +650,10 @@ tokens: comment: "a separator between words in a list" - name: __END__ comment: "marker for the point in the file at which the parser should stop" + - name: MISSING + comment: "a token that was expected but not found" + - name: NOT_PROVIDED + comment: "a token that was not present but it is okay" flags: - name: ArgumentsNodeFlags values: diff --git a/prism/prism.c b/prism/prism.c index d01c2a0766619c..afd767b84c8666 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -8586,85 +8586,66 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { /* Context manipulations */ /******************************************************************************/ -static bool -context_terminator(pm_context_t context, pm_token_t *token) { - switch (context) { - case PM_CONTEXT_MAIN: - case PM_CONTEXT_DEF_PARAMS: - case PM_CONTEXT_DEFINED: - case PM_CONTEXT_MULTI_TARGET: - case PM_CONTEXT_TERNARY: - case PM_CONTEXT_RESCUE_MODIFIER: - return token->type == PM_TOKEN_EOF; - case PM_CONTEXT_DEFAULT_PARAMS: - return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT; - case PM_CONTEXT_PREEXE: - case PM_CONTEXT_POSTEXE: - return token->type == PM_TOKEN_BRACE_RIGHT; - case PM_CONTEXT_MODULE: - case PM_CONTEXT_CLASS: - case PM_CONTEXT_SCLASS: - case PM_CONTEXT_LAMBDA_DO_END: - case PM_CONTEXT_DEF: - case PM_CONTEXT_BLOCK_KEYWORDS: - return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE; - case PM_CONTEXT_WHILE: - case PM_CONTEXT_UNTIL: - case PM_CONTEXT_ELSE: - case PM_CONTEXT_FOR: - case PM_CONTEXT_BEGIN_ENSURE: - case PM_CONTEXT_BLOCK_ENSURE: - case PM_CONTEXT_CLASS_ENSURE: - case PM_CONTEXT_DEF_ENSURE: - case PM_CONTEXT_LAMBDA_ENSURE: - case PM_CONTEXT_MODULE_ENSURE: - case PM_CONTEXT_SCLASS_ENSURE: - return token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_LOOP_PREDICATE: - return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN; - case PM_CONTEXT_FOR_INDEX: - return token->type == PM_TOKEN_KEYWORD_IN; - case PM_CONTEXT_CASE_WHEN: - return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE; - case PM_CONTEXT_CASE_IN: - return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE; - case PM_CONTEXT_IF: - case PM_CONTEXT_ELSIF: - return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_UNLESS: - return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_EMBEXPR: - return token->type == PM_TOKEN_EMBEXPR_END; - case PM_CONTEXT_BLOCK_BRACES: - return token->type == PM_TOKEN_BRACE_RIGHT; - case PM_CONTEXT_PARENS: - return token->type == PM_TOKEN_PARENTHESIS_RIGHT; - case PM_CONTEXT_BEGIN: - case PM_CONTEXT_BEGIN_RESCUE: - case PM_CONTEXT_BLOCK_RESCUE: - case PM_CONTEXT_CLASS_RESCUE: - case PM_CONTEXT_DEF_RESCUE: - case PM_CONTEXT_LAMBDA_RESCUE: - case PM_CONTEXT_MODULE_RESCUE: - case PM_CONTEXT_SCLASS_RESCUE: - return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_BEGIN_ELSE: - case PM_CONTEXT_BLOCK_ELSE: - case PM_CONTEXT_CLASS_ELSE: - case PM_CONTEXT_DEF_ELSE: - case PM_CONTEXT_LAMBDA_ELSE: - case PM_CONTEXT_MODULE_ELSE: - case PM_CONTEXT_SCLASS_ELSE: - return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_LAMBDA_BRACES: - return token->type == PM_TOKEN_BRACE_RIGHT; - case PM_CONTEXT_PREDICATE: - return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON; - case PM_CONTEXT_NONE: - return false; - } +static const uint32_t context_terminators[] = { + [PM_CONTEXT_NONE] = 0, + [PM_CONTEXT_BEGIN] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BEGIN_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BEGIN_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BEGIN_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BLOCK_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_BLOCK_KEYWORDS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_BLOCK_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BLOCK_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BLOCK_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_CASE_WHEN] = (1 << PM_TOKEN_KEYWORD_WHEN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE), + [PM_CONTEXT_CASE_IN] = (1 << PM_TOKEN_KEYWORD_IN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE), + [PM_CONTEXT_CLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_CLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_CLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_CLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_DEF_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF_PARAMS] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_DEFINED] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_DEFAULT_PARAMS] = (1 << PM_TOKEN_COMMA) | (1 << PM_TOKEN_PARENTHESIS_RIGHT), + [PM_CONTEXT_ELSE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_ELSIF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_EMBEXPR] = (1 << PM_TOKEN_EMBEXPR_END), + [PM_CONTEXT_FOR] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_FOR_INDEX] = (1 << PM_TOKEN_KEYWORD_IN), + [PM_CONTEXT_IF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LAMBDA_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_LAMBDA_DO_END] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_LAMBDA_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LAMBDA_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LAMBDA_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LOOP_PREDICATE] = (1 << PM_TOKEN_KEYWORD_DO) | (1 << PM_TOKEN_KEYWORD_THEN), + [PM_CONTEXT_MAIN] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_MODULE] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_MODULE_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_MODULE_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_MODULE_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_MULTI_TARGET] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_PARENS] = (1 << PM_TOKEN_PARENTHESIS_RIGHT), + [PM_CONTEXT_POSTEXE] = (1 << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_PREDICATE] = (1 << PM_TOKEN_KEYWORD_THEN) | (1 << PM_TOKEN_NEWLINE) | (1 << PM_TOKEN_SEMICOLON), + [PM_CONTEXT_PREEXE] = (1 << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_RESCUE_MODIFIER] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_SCLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_SCLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_SCLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_SCLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_TERNARY] = (1 << PM_TOKEN_EOF), + [PM_CONTEXT_UNLESS] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_UNTIL] = (1 << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_WHILE] = (1 << PM_TOKEN_KEYWORD_END), +}; - return false; +static inline bool +context_terminator(pm_context_t context, pm_token_t *token) { + return token->type < 32 && (context_terminators[context] & (1 << token->type)); } /** @@ -21202,6 +21183,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t } PRISM_FALLTHROUGH case PM_CASE_WRITABLE: { + // When we have `it = value`, we need to add `it` as a local + // variable before parsing the value, in case the value + // references the variable. + if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) { + pm_parser_local_add_location(parser, node->location.start, node->location.end, 0); + } + parser_lex(parser); pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1)); @@ -22195,6 +22183,12 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc ) { node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1)); + if (context_terminator(parser->current_context->context, &parser->current)) { + // If this token terminates the current context, then we need to + // stop parsing the expression, as it has become a statement. + return node; + } + switch (PM_NODE_TYPE(node)) { case PM_MULTI_WRITE_NODE: // Multi-write nodes are statements, and cannot be followed by diff --git a/string.c b/string.c index 68c4f5f1d7948c..fe848d6a4a821c 100644 --- a/string.c +++ b/string.c @@ -6576,15 +6576,12 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) * gsub!(pattern) {|match| ... } -> self or nil * gsub!(pattern) -> an_enumerator * - * Performs the specified substring replacement(s) on +self+; - * returns +self+ if any replacement occurred, +nil+ otherwise. + * Like String#gsub, except that: * - * See {Substitution Methods}[rdoc-ref:String@Substitution+Methods]. - * - * Returns an Enumerator if no +replacement+ and no block given. - * - * Related: String#sub, String#gsub, String#sub!. + * - Performs substitutions in +self+ (not in a copy of +self+). + * - Returns +self+ if any characters are removed, +nil+ otherwise. * + * Related: see {Modifying}[rdoc-ref:String@Modifying]. */ static VALUE @@ -6601,14 +6598,41 @@ rb_str_gsub_bang(int argc, VALUE *argv, VALUE str) * gsub(pattern) {|match| ... } -> new_string * gsub(pattern) -> enumerator * - * Returns a copy of +self+ with all occurrences of the given +pattern+ replaced. + * Returns a copy of +self+ with zero or more substrings replaced. * - * See {Substitution Methods}[rdoc-ref:String@Substitution+Methods]. + * Argument +pattern+ may be a string or a Regexp; + * argument +replacement+ may be a string or a Hash. + * Varying types for the argument values makes this method very versatile. + * + * Below are some simple examples; + * for many more examples, see {Substitution Methods}[rdoc-ref:String@Substitution+Methods]. * - * Returns an Enumerator if no +replacement+ and no block given. + * With arguments +pattern+ and string +replacement+ given, + * replaces each matching substring with the given +replacement+ string: * - * Related: String#sub, String#sub!, String#gsub!. + * s = 'abracadabra' + * s.gsub('ab', 'AB') # => "ABracadABra" + * s.gsub(/[a-c]/, 'X') # => "XXrXXXdXXrX" * + * With arguments +pattern+ and hash +replacement+ given, + * replaces each matching substring with a value from the given +replacement+ hash, + * or removes it: + * + * h = {'a' => 'A', 'b' => 'B', 'c' => 'C'} + * s.gsub(/[a-c]/, h) # => "ABrACAdABrA" # 'a', 'b', 'c' replaced. + * s.gsub(/[a-d]/, h) # => "ABrACAABrA" # 'd' removed. + * + * With argument +pattern+ and a block given, + * calls the block with each matching substring; + * replaces that substring with the block's return value: + * + * s.gsub(/[a-d]/) {|substring| substring.upcase } + * # => "ABrACADABrA" + * + * With argument +pattern+ and no block given, + * returns a new Enumerator. + * + * Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. */ static VALUE diff --git a/symbol.c b/symbol.c index 43ab0ffa3271c5..abb2c76dc2f758 100644 --- a/symbol.c +++ b/symbol.c @@ -386,6 +386,20 @@ rb_sym_global_symbols_update_references(void) symbols->ids = rb_gc_location(symbols->ids); } +static int +rb_free_global_symbol_table_i(VALUE *sym_ptr, void *data) +{ + sym_set_free(*sym_ptr); + + return ST_DELETE; +} + +void +rb_free_global_symbol_table(void) +{ + rb_concurrent_set_foreach_with_replace(ruby_global_symbols.sym_set, rb_free_global_symbol_table_i, NULL); +} + WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str)); WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id)); diff --git a/test/mkmf/test_pkg_config.rb b/test/mkmf/test_pkg_config.rb index adf5fa6e92b9f5..abeaf548f8cbe4 100644 --- a/test/mkmf/test_pkg_config.rb +++ b/test/mkmf/test_pkg_config.rb @@ -3,7 +3,9 @@ require 'shellwords' class TestMkmfPkgConfig < TestMkmf - PKG_CONFIG = config_string("PKG_CONFIG") {|path| find_executable0(path)} + PKG_CONFIG = config_string("PKG_CONFIG") do |path| + find_executable0(path, "--version") {$?.success?} + end def setup super @@ -26,7 +28,7 @@ def setup Cflags: -I${includedir}/cflags-I --cflags-other EOF - @pkg_config_path, ENV["PKG_CONFIG_PATH"] = ENV["PKG_CONFIG_PATH"], @fixtures_dir + @pkg_config_path, ENV["PKG_CONFIG_PATH"] = ENV["PKG_CONFIG_PATH"], mkintpath(@fixtures_dir) end end diff --git a/test/prism/fixtures/case_in_hash_key.txt b/test/prism/fixtures/case_in_hash_key.txt new file mode 100644 index 00000000000000..75ac8a846f7380 --- /dev/null +++ b/test/prism/fixtures/case_in_hash_key.txt @@ -0,0 +1,6 @@ +case 1 +in 2 + A.print message: +in 3 + A.print message: +end diff --git a/test/prism/fixtures/it_assignment.txt b/test/prism/fixtures/it_assignment.txt new file mode 100644 index 00000000000000..523b0ffe1e1250 --- /dev/null +++ b/test/prism/fixtures/it_assignment.txt @@ -0,0 +1 @@ +42.tap { it = it; p it } diff --git a/test/prism/fixtures/it_read_and_assignment.txt b/test/prism/fixtures/it_read_and_assignment.txt new file mode 100644 index 00000000000000..2cceeb2a548710 --- /dev/null +++ b/test/prism/fixtures/it_read_and_assignment.txt @@ -0,0 +1 @@ +42.tap { p it; it = it; p it } diff --git a/test/prism/ractor_test.rb b/test/prism/ractor_test.rb index fba10dbfe257a5..6169940bebc558 100644 --- a/test/prism/ractor_test.rb +++ b/test/prism/ractor_test.rb @@ -62,7 +62,11 @@ def with_ractor(*arguments, &block) if reader reader.gets.chomp else - puts(ignore_warnings { Ractor.new(*arguments, &block) }.value) + ractor = ignore_warnings { Ractor.new(*arguments, &block) } + + # Somewhere in the Ruby 3.5.* series, Ractor#take was removed and + # Ractor#value was added. + puts(ractor.respond_to?(:value) ? ractor.value : ractor.take) end end end diff --git a/test/prism/ruby/parameters_signature_test.rb b/test/prism/ruby/parameters_signature_test.rb index af5b54ed91eac9..ea1eea106ba81c 100644 --- a/test/prism/ruby/parameters_signature_test.rb +++ b/test/prism/ruby/parameters_signature_test.rb @@ -54,7 +54,7 @@ def test_keyrest_anonymous assert_parameters([[:keyrest, :**]], "**") end - if RUBY_ENGINE != "truffleruby" + if RUBY_ENGINE == "ruby" def test_key_ordering assert_parameters([[:keyreq, :a], [:keyreq, :b], [:key, :c], [:key, :d]], "a:, c: 1, b:, d: 2") end diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb index 2396f4186cec0e..156e8f9e9fad4d 100644 --- a/test/prism/ruby/parser_test.rb +++ b/test/prism/ruby/parser_test.rb @@ -99,16 +99,6 @@ class ParserTest < TestCase "seattlerb/regexp_esc_C_slash.txt", ] - # These files are either failing to parse or failing to translate, so we'll - # skip them for now. - skip_all = skip_incorrect | [ - ] - - # Not sure why these files are failing on JRuby, but skipping them for now. - if RUBY_ENGINE == "jruby" - skip_all.push("emoji_method_calls.txt", "symbols.txt") - end - # These files are failing to translate their lexer output into the lexer # output expected by the parser gem, so we'll skip them for now. skip_tokens = [ @@ -147,7 +137,7 @@ class ParserTest < TestCase define_method(fixture.test_name) do assert_equal_parses( fixture, - compare_asts: !skip_all.include?(fixture.path), + compare_asts: !skip_incorrect.include?(fixture.path), compare_tokens: !skip_tokens.include?(fixture.path), compare_comments: fixture.path != "embdoc_no_newline_at_end.txt" ) diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index d4b278c28e1366..5c37178889ecf5 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -return if RUBY_VERSION < "3.3" || RUBY_ENGINE == "truffleruby" +return if RUBY_VERSION < "3.3" || RUBY_ENGINE != "ruby" require_relative "../test_helper" diff --git a/vm.c b/vm.c index 9284a2ce69ffe3..4223c2d2ac6f31 100644 --- a/vm.c +++ b/vm.c @@ -3146,6 +3146,7 @@ ruby_vm_destruct(rb_vm_t *vm) rb_free_encoded_insn_data(); rb_free_global_enc_table(); rb_free_loaded_builtin_table(); + rb_free_global_symbol_table(); rb_free_shared_fiber_pool(); rb_free_transcoder_table(); diff --git a/yjit.c b/yjit.c index f83a330bd6f927..44788eaf2c02fd 100644 --- a/yjit.c +++ b/yjit.c @@ -622,8 +622,9 @@ rb_yjit_iseq_inspect(const rb_iseq_t *iseq) const char *path = RSTRING_PTR(rb_iseq_path(iseq)); int lineno = iseq->body->location.code_location.beg_pos.lineno; - char *buf = ZALLOC_N(char, strlen(label) + strlen(path) + num_digits(lineno) + 3); - sprintf(buf, "%s@%s:%d", label, path, lineno); + const size_t size = strlen(label) + strlen(path) + num_digits(lineno) + 3; + char *buf = ZALLOC_N(char, size); + snprintf(buf, size, "%s@%s:%d", label, path, lineno); return buf; }