From 8c9681bc323e47ec2f4c3cc7516c593ce360dbdc Mon Sep 17 00:00:00 2001 From: tompng Date: Fri, 11 Apr 2025 19:25:43 +0900 Subject: [PATCH 1/3] Nesting calculation by Prism --- lib/irb.rb | 19 +- lib/irb/nesting_parser.rb | 563 ++++++++++++++++++++------------ lib/irb/ruby-lex.rb | 17 +- test/irb/test_irb.rb | 17 +- test/irb/test_nesting_parser.rb | 48 ++- test/irb/test_ruby_lex.rb | 3 +- 6 files changed, 399 insertions(+), 268 deletions(-) diff --git a/lib/irb.rb b/lib/irb.rb index c76c3f569..1c1b8418b 100644 --- a/lib/irb.rb +++ b/lib/irb.rb @@ -5,6 +5,7 @@ # by Keiju ISHITSUKA(keiju@ruby-lang.org) # +require "prism" require "ripper" require "reline" @@ -323,15 +324,13 @@ def configure_io end if @context.io.respond_to?(:dynamic_prompt) @context.io.dynamic_prompt do |lines| - tokens = RubyLex.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, local_variables: @context.local_variables) - line_results = IRB::NestingParser.parse_by_line(tokens) + code = lines.map{ |l| l + "\n" }.join + tokens = RubyLex.ripper_lex_without_warning(code, local_variables: @context.local_variables) + parse_lex_result = Prism.parse_lex(code, scopes: [@context.local_variables]) + line_results = IRB::NestingParser.parse_by_line(parse_lex_result) tokens_until_line = [] - line_results.map.with_index do |(line_tokens, _prev_opens, next_opens, _min_depth), line_num_offset| - line_tokens.each do |token, _s| - # Avoid appending duplicated token. Tokens that include "n" like multiline - # tstring_content can exist in multiple lines. - tokens_until_line << token if token != tokens_until_line.last - end + line_results.map.with_index do |(_prev_opens, next_opens, _min_depth), line_num_offset| + tokens_until_line << tokens.shift while !tokens.empty? && tokens.first.pos[0] <= line_num_offset + 1 continue = @scanner.should_continue?(tokens_until_line) generate_prompt(next_opens, continue, line_num_offset) end @@ -344,8 +343,8 @@ def configure_io next nil if !is_newline && lines[line_index]&.byteslice(0, byte_pointer)&.match?(/\A\s*\z/) code = lines[0..line_index].map { |l| "#{l}\n" }.join - tokens = RubyLex.ripper_lex_without_warning(code, local_variables: @context.local_variables) - @scanner.process_indent_level(tokens, lines, line_index, is_newline) + parse_lex_result = Prism.parse_lex(code, scopes: [@context.local_variables]) + @scanner.process_indent_level(parse_lex_result, lines, line_index, is_newline) end end end diff --git a/lib/irb/nesting_parser.rb b/lib/irb/nesting_parser.rb index c1c9a5cc7..86d7215da 100644 --- a/lib/irb/nesting_parser.rb +++ b/lib/irb/nesting_parser.rb @@ -1,238 +1,375 @@ # frozen_string_literal: true + +require 'prism' + module IRB module NestingParser - IGNORE_TOKENS = %i[on_sp on_ignored_nl on_comment on_embdoc_beg on_embdoc on_embdoc_end] + NestingElem = Struct.new(:pos, :event, :tok) - class << self - # Scan each token and call the given block with array of token and other information for parsing - def scan_opens(tokens) - opens = [] - pending_heredocs = [] - first_token_on_line = true - tokens.each do |t| - skip = false - last_tok, state, args = opens.last - case state - when :in_alias_undef - skip = t.event == :on_kw - when :in_unquoted_symbol - unless IGNORE_TOKENS.include?(t.event) - opens.pop - skip = true - end - when :in_lambda_head - opens.pop if t.event == :on_tlambeg || (t.event == :on_kw && t.tok == 'do') - when :in_method_head - unless IGNORE_TOKENS.include?(t.event) - next_args = [] - body = nil - if args.include?(:receiver) - case t.event - when :on_lparen, :on_ivar, :on_gvar, :on_cvar - # def (receiver). | def @ivar. | def $gvar. | def @@cvar. - next_args << :dot - when :on_kw - case t.tok - when 'self', 'true', 'false', 'nil' - # def self(arg) | def self. - next_args.push(:arg, :dot) - else - # def if(arg) - skip = true - next_args << :arg - end - when :on_op, :on_backtick - # def +(arg) - skip = true - next_args << :arg - when :on_ident, :on_const - # def a(arg) | def a. - next_args.push(:arg, :dot) - end - end - if args.include?(:dot) - # def receiver.name - next_args << :name if t.event == :on_period || (t.event == :on_op && t.tok == '::') - end - if args.include?(:name) - if %i[on_ident on_const on_op on_kw on_backtick].include?(t.event) - # def name(arg) | def receiver.name(arg) - next_args << :arg - skip = true - end - end - if args.include?(:arg) - case t.event - when :on_nl, :on_semicolon - # def receiver.f; - body = :normal - when :on_lparen - # def receiver.f() - next_args << :eq - else - if t.event == :on_op && t.tok == '=' - # def receiver.f = - body = :oneliner - else - # def receiver.f arg - next_args << :arg_without_paren - end - end - end - if args.include?(:eq) - if t.event == :on_op && t.tok == '=' - body = :oneliner - else - body = :normal - end - end - if args.include?(:arg_without_paren) - if %i[on_semicolon on_nl].include?(t.event) - # def f a; - body = :normal - else - # def f a, b - next_args << :arg_without_paren - end - end - if body == :oneliner - opens.pop - elsif body - opens[-1] = [last_tok, nil] - else - opens[-1] = [last_tok, :in_method_head, next_args] - end - end - when :in_for_while_until_condition - if t.event == :on_semicolon || t.event == :on_nl || (t.event == :on_kw && t.tok == 'do') - skip = true if t.event == :on_kw && t.tok == 'do' - opens[-1] = [last_tok, nil] - end - end + class NestingVisitor < Prism::Visitor + def initialize + @lines = [] + @heredocs = [] + end - unless skip - case t.event - when :on_kw - case t.tok - when 'begin', 'class', 'module', 'do', 'case' - opens << [t, nil] - when 'end' - opens.pop - when 'def' - opens << [t, :in_method_head, [:receiver, :name]] - when 'if', 'unless' - unless t.state.allbits?(Ripper::EXPR_LABEL) - opens << [t, nil] - end - when 'while', 'until' - unless t.state.allbits?(Ripper::EXPR_LABEL) - opens << [t, :in_for_while_until_condition] - end - when 'ensure', 'rescue' - unless t.state.allbits?(Ripper::EXPR_LABEL) - opens.pop - opens << [t, nil] - end - when 'alias' - opens << [t, :in_alias_undef, 2] - when 'undef' - opens << [t, :in_alias_undef, 1] - when 'elsif', 'else', 'when' - opens.pop - opens << [t, nil] - when 'for' - opens << [t, :in_for_while_until_condition] - when 'in' - if last_tok&.event == :on_kw && %w[case in].include?(last_tok.tok) && first_token_on_line - opens.pop - opens << [t, nil] - end - end - when :on_tlambda - opens << [t, :in_lambda_head] - when :on_lparen, :on_lbracket, :on_lbrace, :on_tlambeg, :on_embexpr_beg, :on_embdoc_beg - opens << [t, nil] - when :on_rparen, :on_rbracket, :on_rbrace, :on_embexpr_end, :on_embdoc_end - opens.pop - when :on_heredoc_beg - pending_heredocs << t - when :on_heredoc_end - opens.pop - when :on_backtick - opens << [t, nil] unless t.state == Ripper::EXPR_ARG - when :on_tstring_beg, :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_regexp_beg - opens << [t, nil] - when :on_tstring_end, :on_regexp_end, :on_label_end - opens.pop - when :on_symbeg - if t.tok == ':' - opens << [t, :in_unquoted_symbol] - else - opens << [t, nil] - end + def nestings + size = [@lines.size, @heredocs.size].max + nesting = [] + size.times.map do |line_index| + @lines[line_index]&.sort_by { |col, pri| [col, pri] }&.each do |col, pri, elem| + if elem + nesting << elem + else + nesting.pop end end - if t.event == :on_nl || t.event == :on_semicolon - first_token_on_line = true - elsif t.event != :on_sp - first_token_on_line = false + @heredocs[line_index]&.sort_by { |_node, (_line, col)| col }&.reverse_each do |elem| + nesting << elem end - if pending_heredocs.any? && t.tok.include?("\n") - pending_heredocs.reverse_each { |t| opens << [t, nil] } - pending_heredocs = [] + nesting.dup + end + end + + def heredoc_open(node) + elem = NestingElem.new([node.location.start_line, node.location.start_column], :on_heredoc_beg, node.opening) + (@heredocs[node.location.start_line - 1] ||= []) << elem + end + + def open(line, column, elem) + (@lines[line - 1] ||= []) << [column, +1, elem] + end + + def close(line, column) + (@lines[line - 1] ||= []) << [column, -1] + end + + def modifier_node?(node, keyword_loc) + !(keyword_loc && node.location.start_line == keyword_loc.start_line && node.location.start_column == keyword_loc.start_column) + end + + def open_location(location, type, tok) + open(location.start_line, location.start_column, NestingElem.new([location.start_line, location.start_column], type, tok)) + end + + def close_location(location) + close(location.end_line, location.end_column) + end + + def close_location_start(location) + close(location.start_line, location.start_column) + end + + def close_end_keyword_loc(node) + close_location(node.end_keyword_loc) if node.end_keyword == 'end' + end + + def close_closing_loc(node) + close_location(node.closing_loc) unless node.closing.nil? || node.closing.empty? + end + + def visit_for_node(node) + super + open_location(node.location, :on_kw, 'for') + close_end_keyword_loc(node) + end + + def visit_while_node(node) + super + return if modifier_node?(node, node.keyword_loc) + + open_location(node.location, :on_kw, 'while') + close_closing_loc(node) + end + + def visit_until_node(node) + super + return if modifier_node?(node, node.keyword_loc) + + open_location(node.location, :on_kw, 'until') + close_closing_loc(node) + end + + def visit_if_node(node) + super + return if !node.if_keyword || modifier_node?(node, node.if_keyword_loc) + + open_location(node.location, :on_kw, node.if_keyword) + if node.subsequent + close_location_start(node.subsequent.location) + else + close_end_keyword_loc(node) + end + end + + def visit_unless_node(node) + super + return if modifier_node?(node, node.keyword_loc) + + open_location(node.location, :on_kw, 'unless') + if node.else_clause + close_location_start(node.else_clause.location) + else + close_end_keyword_loc(node) + end + end + + def visit_case_node(node) + super + open_location(node.location, :on_kw, 'case') + if node.else_clause + close_location_start(node.else_clause.location) + else + close_end_keyword_loc(node) + end + end + alias visit_case_match_node visit_case_node + + def visit_when_node(node) + super + close_location_start(node.location) + open_location(node.location, :on_kw, 'when') + end + + def visit_in_node(node) + super + close_location_start(node.location) + open_location(node.location, :on_kw, 'in') + end + + def visit_else_node(node) + super + if node.else_keyword == 'else' + open_location(node.location, :on_kw, 'else') + close_end_keyword_loc(node) + end + end + + def visit_ensure_node(node) + super + return if modifier_node?(node, node.ensure_keyword_loc) + + close_location_start(node.location) + open_location(node.location, :on_kw, 'ensure') + end + + def visit_rescue_node(node) + super + return if modifier_node?(node, node.keyword_loc) + + close_location_start(node.location) + open_location(node.location, :on_kw, 'rescue') + end + + def visit_begin_node(node) + super + if node.begin_keyword + open_location(node.location, :on_kw, 'begin') + close_end_keyword_loc(node) + end + end + + def visit_block_node(node) + super + open_location(node.location, node.opening == '{' ? :on_lbrace : :on_kw, node.opening) + close_closing_loc(node) + end + + def visit_array_node(node) + super + type = + case node.opening + when nil + # `x = 1, 2` doesn't have opening + nil + when '[' + :bracket + when /\A%W/ + :on_words_beg + when /\A%w/ + :on_qwords_beg + when /\A%I/ + :on_symbols_beg + when /\A%i/ + :on_qsymbols_beg + end + + if type + open_location(node.location, type, node.opening) + close_closing_loc(node) + end + end + + def visit_hash_node(node) + super + open_location(node.location, :on_lbrace, '{') + close_closing_loc(node) + end + + def heredoc_string_like(node, type) + if node.opening&.start_with?('<<') + heredoc_open(node) + # Heredoc closing contains trailing newline. We need to exclude it + close_location_start(node.closing_loc) unless node.closing.empty? + elsif node.opening + open_location(node.location, type, node.opening) + if node.closing && node.closing != '' + # Closing of `"#{\n` is "\n". We need to treat it as not-closed. + close_location_start(node.closing_loc) if node.opening.match?(/\n\z/) || node.closing != "\n" end - if opens.last && opens.last[1] == :in_alias_undef && !IGNORE_TOKENS.include?(t.event) && t.event != :on_heredoc_end - tok, state, arg = opens.pop - opens << [tok, state, arg - 1] if arg >= 1 + end + end + + def visit_embedded_statements_node(node) + super + open_location(node.location, :on_embexpr_beg, '#{') + close_closing_loc(node) + end + + def visit_interpolated_string_node(node) + super + heredoc_string_like(node, :on_tstring_beg) + end + alias visit_string_node visit_interpolated_string_node + + def visit_interpolated_x_string_node(node) + super + heredoc_string_like(node, :on_backtick) + end + alias visit_x_string_node visit_interpolated_x_string_node + + def visit_symbol_node(node) + super + unless node.opening.nil? || node.opening.empty? || node.opening == ':' + # :"sym" or %s[sym] + open_location(node.location, :on_symbeg, node.opening) + close_closing_loc(node) + end + end + alias visit_interpolated_symbol_node visit_symbol_node + + def visit_regular_expression_node(node) + super + open_location(node.location, :on_regexp_beg, node.opening) + close_closing_loc(node) + end + alias visit_interpolated_regular_expression_node visit_regular_expression_node + + def visit_parentheses_node(node) + super + open_location(node.location, :on_lparen, '(') + close_closing_loc(node) + end + + def visit_call_node(node) + super + type = + case node.opening + when '(' + :on_lparen + when '[' + :on_lbracket end - yield t, opens if block_given? + + if type + open_location(node.opening_loc, type, node.opening) + close_location(node.closing_loc) unless node.closing.empty? + end + end + + def visit_block_parameters_node(node) + super + if node.opening == '(' + open_location(node.location, :on_lparen, '(') + close_closing_loc(node) + end + end + + def visit_lambda_node(node) + super + open_location(node.opening_loc, :on_tlambeg, node.opening) + close_location(node.closing_loc) unless node.closing.empty? + end + + def visit_super_node(node) + super + if node.lparen + open_location(node.lparen_loc, :on_lparen, '(') + close_location(node.rparen_loc) if node.rparen == ')' + end + end + alias visit_yield_node visit_super_node + alias visit_defined_node visit_super_node + + def visit_def_node(node) + super + open_location(node.location, :on_kw, 'def') + if node.lparen == '(' + open_location(node.lparen_loc, :on_lparen, '(') + close_location(node.rparen_loc) if node.rparen == ')' end - opens.map(&:first) + pending_heredocs.reverse + if node.equal + close_location(node.equal_loc) + else + close_end_keyword_loc(node) + end + end + + def visit_class_node(node) + super + open_location(node.location, :on_kw, 'class') + close_end_keyword_loc(node) + end + alias visit_singleton_class_node visit_class_node + + def visit_module_node(node) + super + open_location(node.location, :on_kw, 'module') + close_end_keyword_loc(node) end + end + + class << self - def open_tokens(tokens) - # scan_opens without block will return a list of open tokens at last token position - scan_opens(tokens) + # Return a list of open nestings at last token position + def open_nestings(parse_lex_result) + parse_by_line(parse_lex_result).last[1] end - # Calculates token information [line_tokens, prev_opens, next_opens, min_depth] for each line. + # Calculates nesting information [prev_opens, next_opens, min_depth] for each line. # Example code # ["hello # world"+( # First line - # line_tokens: [[lbracket, '['], [tstring_beg, '"'], [tstring_content("hello\nworld"), "hello\n"]] - # prev_opens: [] - # next_tokens: [lbracket, tstring_beg] - # min_depth: 0 (minimum at beginning of line) + # prev_opens: [] + # next_opens: [lbracket, tstring_beg] + # min_depth: 0 (minimum at beginning of line) # Second line - # line_tokens: [[tstring_content("hello\nworld"), "world"], [tstring_end, '"'], [op, '+'], [lparen, '(']] - # prev_opens: [lbracket, tstring_beg] - # next_tokens: [lbracket, lparen] - # min_depth: 1 (minimum just after tstring_end) - def parse_by_line(tokens) - line_tokens = [] - prev_opens = [] - min_depth = 0 - output = [] - last_opens = scan_opens(tokens) do |t, opens| - depth = t == opens.last&.first ? opens.size - 1 : opens.size - min_depth = depth if depth < min_depth - if t.tok.include?("\n") - t.tok.each_line do |line| - line_tokens << [t, line] - next if line[-1] != "\n" - next_opens = opens.map(&:first) - output << [line_tokens, prev_opens, next_opens, min_depth] - prev_opens = next_opens - min_depth = prev_opens.size - line_tokens = [] - end - else - line_tokens << [t, t.tok] + # prev_opens: [lbracket, tstring_beg] + # next_opens: [lbracket, lparen] + # min_depth: 1 (minimum just after tstring_end) + + def parse_by_line(parse_lex_result) + visitor = NestingVisitor.new + node, tokens = parse_lex_result.value + node.accept(visitor) + tokens.each do |token,| + case token.type + when :EMBDOC_BEGIN + visitor.open_location(token.location, :on_embdoc_beg, '=begin') + when :EMBDOC_END + visitor.close_location_start(token.location) end end - output << [line_tokens, prev_opens, last_opens, min_depth] if line_tokens.any? - output + nestings = visitor.nestings + last_nesting = nestings.last || [] + + num_lines = parse_lex_result.source.source.lines.size + num_lines.times.map do |i| + prev_opens = i == 0 ? [] : nestings[i - 1] || last_nesting + opens = nestings[i] || last_nesting + min_depth = prev_opens.zip(opens).take_while { |s, e| s == e }.size + [prev_opens, opens, min_depth] + end end end end diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index dd4a8d060..e577d4ea7 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -4,6 +4,7 @@ # by Keiju ISHITSUKA(keiju@ruby-lang.org) # +require "prism" require "ripper" require "jruby" if RUBY_ENGINE == "jruby" require_relative "nesting_parser" @@ -170,7 +171,7 @@ def ripper_lex_without_warning(code, local_variables: []) def check_code_state(code, local_variables:) tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables) - opens = NestingParser.open_tokens(tokens) + opens = NestingParser.open_nestings(Prism.parse_lex(code, scopes: [local_variables])) [tokens, opens, code_terminated?(code, tokens, opens, local_variables: local_variables)] end @@ -339,7 +340,7 @@ def free_indent_token?(token) # Calculates the difference of pasted code's indent and indent calculated from tokens def indent_difference(lines, line_results, line_index) loop do - _tokens, prev_opens, _next_opens, min_depth = line_results[line_index] + prev_opens, _next_opens, min_depth = line_results[line_index] open_token = prev_opens.last if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token)) # If the leading whitespace is an indent, return the difference @@ -356,14 +357,14 @@ def indent_difference(lines, line_results, line_index) end end - def process_indent_level(tokens, lines, line_index, is_newline) - line_results = NestingParser.parse_by_line(tokens) + def process_indent_level(parse_lex_result, lines, line_index, is_newline) + line_results = NestingParser.parse_by_line(parse_lex_result) result = line_results[line_index] if result - _tokens, prev_opens, next_opens, min_depth = result + prev_opens, next_opens, min_depth = result else # When last line is empty - prev_opens = next_opens = line_results.last[2] + prev_opens = next_opens = line_results.last[1] min_depth = next_opens.size end @@ -405,7 +406,7 @@ def process_indent_level(tokens, lines, line_index, is_newline) elsif prev_open_token&.event == :on_heredoc_beg tok = prev_open_token.tok if prev_opens.size <= next_opens.size - if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token + if is_newline && lines[line_index].empty? && line_results[line_index - 1][0].last != next_open_token # First line in heredoc tok.match?(/^<<[-~]/) ? base_indent + indent : indent elsif tok.match?(/^<<~/) @@ -485,7 +486,7 @@ def check_termination_in_prev_line(code, local_variables:) if first_token && first_token.state != Ripper::EXPR_DOT tokens_without_last_line = tokens[0..index] code_without_last_line = tokens_without_last_line.map(&:tok).join - opens_without_last_line = NestingParser.open_tokens(tokens_without_last_line) + opens_without_last_line = NestingParser.open_nestings(Prism.parse_lex(code_without_last_line, scopes: [local_variables])) if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line, local_variables: local_variables) return last_line_tokens.map(&:tok).join end diff --git a/test/irb/test_irb.rb b/test/irb/test_irb.rb index 1b0290d0c..b53003e60 100644 --- a/test/irb/test_irb.rb +++ b/test/irb/test_irb.rb @@ -604,18 +604,17 @@ def test_pasted_code_keep_base_indent_spaces_with_heredoc [%q( [1), 10, 12, 3], [%q( ]+[["a), 10, 14, 4], [%q(b" + <<~A + <<-B + < Date: Sat, 17 Jan 2026 01:39:25 +0900 Subject: [PATCH 2/3] Avoid prism-1.8.0 used in test, use latest version on github --- Gemfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Gemfile b/Gemfile index b3d15d84c..20be5b5e0 100644 --- a/Gemfile +++ b/Gemfile @@ -28,6 +28,8 @@ if ENV['PRISM_VERSION'] == 'latest' gem "prism", github: "ruby/prism" elsif ENV['PRISM_VERSION'] gem "prism", ENV['PRISM_VERSION'] +else + gem "prism", "!= 1.8.0" end if RUBY_VERSION >= "3.0.0" && !is_truffleruby From 9dd75f9b5f1e6aeaa3baa3ef96b5a9c873691a2a Mon Sep 17 00:00:00 2001 From: tompng Date: Thu, 15 Jan 2026 04:18:02 +0900 Subject: [PATCH 3/3] RubyLex Ripper to Prism Remove remaining ripper dependency in: syntax check, code continue/termination check, show-source, regexp-completion and string-like command arg parse. --- lib/irb.rb | 16 +- lib/irb/command/internal_helpers.rb | 9 +- lib/irb/completion.rb | 15 +- lib/irb/ruby-lex.rb | 354 +++++++++-------------- lib/irb/source_finder.rb | 16 +- test/irb/test_irb.rb | 2 +- test/irb/test_ruby_lex.rb | 95 +++--- test/irb/yamatanooroti/test_rendering.rb | 24 +- 8 files changed, 215 insertions(+), 316 deletions(-) diff --git a/lib/irb.rb b/lib/irb.rb index 1c1b8418b..57ea0e425 100644 --- a/lib/irb.rb +++ b/lib/irb.rb @@ -6,7 +6,6 @@ # require "prism" -require "ripper" require "reline" require_relative "irb/init" @@ -252,11 +251,10 @@ def read_input_nomultiline(prompt) code << line return code if command?(code) - tokens, opens, terminated = @scanner.check_code_state(code, local_variables: @context.local_variables) + continue, opens, terminated = @scanner.check_code_state(code, local_variables: @context.local_variables) return code if terminated line_offset += 1 - continue = @scanner.should_continue?(tokens) prompt = generate_prompt(opens, continue, line_offset) end end @@ -317,7 +315,7 @@ def configure_io else next true if command?(code) - _tokens, _opens, terminated = @scanner.check_code_state(code, local_variables: @context.local_variables) + _continue, _opens, terminated = @scanner.check_code_state(code, local_variables: @context.local_variables) terminated end end @@ -325,13 +323,17 @@ def configure_io if @context.io.respond_to?(:dynamic_prompt) @context.io.dynamic_prompt do |lines| code = lines.map{ |l| l + "\n" }.join - tokens = RubyLex.ripper_lex_without_warning(code, local_variables: @context.local_variables) parse_lex_result = Prism.parse_lex(code, scopes: [@context.local_variables]) line_results = IRB::NestingParser.parse_by_line(parse_lex_result) + + tokens = parse_lex_result.value[1].map(&:first) + tokens_by_line = tokens.group_by {|t| t.location.start_line - 1 } + tokens_until_line = [] line_results.map.with_index do |(_prev_opens, next_opens, _min_depth), line_num_offset| - tokens_until_line << tokens.shift while !tokens.empty? && tokens.first.pos[0] <= line_num_offset + 1 - continue = @scanner.should_continue?(tokens_until_line) + line = lines[line_num_offset] + tokens_until_line.concat(tokens_by_line[line_num_offset] || []) + continue = @scanner.should_continue?(tokens_until_line, line, line_num_offset + 1) generate_prompt(next_opens, continue, line_num_offset) end end diff --git a/lib/irb/command/internal_helpers.rb b/lib/irb/command/internal_helpers.rb index a01ddb1d4..60914ebe1 100644 --- a/lib/irb/command/internal_helpers.rb +++ b/lib/irb/command/internal_helpers.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'prism' + module IRB module Command # Internal use only, for default command's backward compatibility. @@ -7,9 +9,10 @@ module RubyArgsExtractor # :nodoc: def unwrap_string_literal(str) return if str.empty? - sexp = Ripper.sexp(str) - if sexp && sexp.size == 2 && sexp.last&.first&.first == :string_literal - @irb_context.workspace.binding.eval(str).to_s + result = Prism.parse(str) + body = result.value.statements.body + if result.success? && body.size == 1 && body.first.is_a?(Prism::StringNode) + body.first.unescaped else str end diff --git a/lib/irb/completion.rb b/lib/irb/completion.rb index 40a7d3b53..6c2706f16 100644 --- a/lib/irb/completion.rb +++ b/lib/irb/completion.rb @@ -166,22 +166,13 @@ def complete_require_path(target, preposing, postposing) else return nil # It's not String literal end - tokens = RubyLex.ripper_lex_without_warning(preposing.rstrip) - tok = nil - tokens.reverse_each do |t| - unless [:on_lparen, :on_sp, :on_ignored_sp, :on_nl, :on_ignored_nl, :on_comment].include?(t.event) - tok = t - break - end - end - return unless tok&.event == :on_ident && tok.state == Ripper::EXPR_CMDARG - case tok.tok - when 'require' + case preposing + when /(^|[^\w])require\(? *\z/ retrieve_files_to_require_from_load_path.filter_map { |path| quote + path if path.start_with?(actual_target) } - when 'require_relative' + when /(^|[^\w])require_relative\(? *\z/ retrieve_files_to_require_relative_from_current_dir.filter_map { |path| quote + path if path.start_with?(actual_target) } diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index e577d4ea7..32e2120ab 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -5,7 +5,6 @@ # require "prism" -require "ripper" require "jruby" if RUBY_ENGINE == "jruby" require_relative "nesting_parser" @@ -97,213 +96,149 @@ def compile_with_errors_suppressed(code, line_no: 1) end result end - - def generate_local_variables_assign_code(local_variables) - # Some reserved words could be a local variable - # Example: def f(if: 1); binding.irb; end - # These reserved words should be removed from assignment code - local_variables -= RESERVED_WORDS - "#{local_variables.join('=')}=nil;" unless local_variables.empty? - end - - # Some part of the code is not included in Ripper's token. - # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr. - # With interpolated tokens, tokens.map(&:tok).join will be equal to code. - def interpolate_ripper_ignored_tokens(code, tokens) - line_positions = [0] - code.lines.each do |line| - line_positions << line_positions.last + line.bytesize - end - prev_byte_pos = 0 - interpolated = [] - prev_line = 1 - tokens.each do |t| - line, col = t.pos - byte_pos = line_positions[line - 1] + col - if prev_byte_pos < byte_pos - tok = code.byteslice(prev_byte_pos...byte_pos) - pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]] - interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0) - prev_line += tok.count("\n") - end - interpolated << t - prev_byte_pos = byte_pos + t.tok.bytesize - prev_line += t.tok.count("\n") - end - if prev_byte_pos < code.bytesize - tok = code.byteslice(prev_byte_pos..) - pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]] - interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0) - end - interpolated - end - - def ripper_lex_without_warning(code, local_variables: []) - verbose, $VERBOSE = $VERBOSE, nil - lvars_code = generate_local_variables_assign_code(local_variables) - original_code = code - if lvars_code - code = "#{lvars_code}\n#{code}" - line_no = 0 - else - line_no = 1 - end - - compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no| - lexer = Ripper::Lexer.new(inner_code, '-', line_no) - tokens = [] - lexer.scan.each do |t| - next if t.pos.first == 0 - prev_tk = tokens.last - position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize - if position_overlapped - tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event) - else - tokens << t - end - end - interpolate_ripper_ignored_tokens(original_code, tokens) - end - ensure - $VERBOSE = verbose - end end def check_code_state(code, local_variables:) - tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables) - opens = NestingParser.open_nestings(Prism.parse_lex(code, scopes: [local_variables])) - [tokens, opens, code_terminated?(code, tokens, opens, local_variables: local_variables)] + parse_lex_result = Prism.parse_lex(code, scopes: [local_variables]) + + opens = NestingParser.open_nestings(parse_lex_result) + lines = code.lines + tokens = parse_lex_result.value[1].map(&:first).sort_by {|t| t.location.start_offset } + continue = should_continue?(tokens, lines.last, lines.size) + [continue, opens, code_terminated?(code, continue, opens, local_variables: local_variables)] end - def code_terminated?(code, tokens, opens, local_variables:) + def code_terminated?(code, continue, opens, local_variables:) case check_code_syntax(code, local_variables: local_variables) when :unrecoverable_error - true + return true when :recoverable_error - false + return false when :other_error - opens.empty? && !should_continue?(tokens) + opens.empty? && !continue when :valid - !should_continue?(tokens) + !continue end end def assignment_expression?(code, local_variables:) - # Try to parse the code and check if the last of possibly multiple - # expressions is an assignment type. - - # If the expression is invalid, Ripper.sexp should return nil which will - # result in false being returned. Any valid expression should return an - # s-expression where the second element of the top level array is an - # array of parsed expressions. The first element of each expression is the - # expression's type. - verbose, $VERBOSE = $VERBOSE, nil - code = "#{RubyLex.generate_local_variables_assign_code(local_variables) || 'nil;'}\n#{code}" - # Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part. - node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0) - ASSIGNMENT_NODE_TYPES.include?(node_type) - ensure - $VERBOSE = verbose + # Parse the code and check if the last of possibly multiple + # expressions is an assignment node. + program_node = Prism.parse(code, scopes: [local_variables]).value + node = program_node.statements.body.last + case node + when nil + # Empty code, comment-only code or invalid code + false + when Prism::CallNode + # a.b = 1, a[b] = 1 + # Prism::CallNode#equal_loc is only available in prism >= 1.7.0 + if node.name == :[]= + # Distinguish between `a[k] = v` from `a.[]= k, v`, `a.[]=(k, v)` + node.opening == '[' + else + node.name.end_with?('=') + end + when Prism::MatchWriteNode + # /(?)/ =~ a, Class name is *WriteNode but not an assignment. + false + else + # a = 1, @a = 1, $a = 1, @@a = 1, A = 1, a += 1, a &&= 1, a.b += 1, and so on + node.class.name.match?(/WriteNode/) + end end - def should_continue?(tokens) - # Look at the last token and check if IRB need to continue reading next line. - # Example code that should continue: `a\` `a +` `a.` - # Trailing spaces, newline, comments are skipped - return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n" - - tokens.reverse_each do |token| - case token.event - when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end - # Skip - when :on_regexp_end, :on_heredoc_end, :on_semicolon - # State is EXPR_BEG but should not continue - return false + def should_continue?(tokens, line, line_num) + # Check if the line ends with \\. Then IRB should continue reading next line. + # Space and backslash are not included in Prism token, so find trailing text after last non-newline token position. + trailing = line + tokens.reverse_each do |t| + break if t.location.start_line < line_num + if t.location.start_line == line_num && t.type != :IGNORED_NEWLINE && t.type != :NEWLINE && t.type != :EOF + trailing = line.byteslice(t.location.end_column..) + break + end + end + return true if trailing.match?(/\A\s*\\\n?\z/) + + # "1 + \n" and "foo.\n" should continue. + pos = tokens.size - 1 + ignored_newline_found = false + while pos >= 0 + case tokens[pos].type + when :EMBDOC_BEGIN, :EMBDOC_LINE, :EMBDOC_END, :COMMENT, :EOF + pos -= 1 + when :IGNORED_NEWLINE + pos -= 1 + ignored_newline_found = true else - # Endless range should not continue - return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/) - - # EXPR_DOT and most of the EXPR_BEG should continue - return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT) + break end end - false + + # If IGNORED_NEWLINE token is following non-newline non-semicolon token, it should continue. + # Special case: treat `1..` and `1...` as not continuing. + ignored_newline_found && pos >= 0 && !%i[DOT_DOT DOT_DOT_DOT NEWLINE SEMICOLON].include?(tokens[pos].type) end def check_code_syntax(code, local_variables:) - lvars_code = RubyLex.generate_local_variables_assign_code(local_variables) - code = "#{lvars_code}\n#{code}" - - begin # check if parser error are available - verbose, $VERBOSE = $VERBOSE, nil - case RUBY_ENGINE - when 'ruby' - self.class.compile_with_errors_suppressed(code) do |inner_code, line_no| - RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no) - end - when 'jruby' - JRuby.compile_ir(code) + result = Prism.lex(code, scopes: [local_variables]) + return :valid if result.success? + + # Get the token excluding trailing comments and newlines + # to compare error location with the last or second-last meaningful token location + tokens = result.value.map(&:first) + until tokens.empty? + case tokens.last.type + when :COMMENT, :NEWLINE, :IGNORED_NEWLINE, :EMBDOC_BEGIN, :EMBDOC_LINE, :EMBDOC_END, :EOF + tokens.pop else - catch(:valid) do - eval("BEGIN { throw :valid, true }\n#{code}") - false - end + break end - rescue EncodingError - # This is for a hash with invalid encoding symbol, {"\xAE": 1} - :unrecoverable_error - rescue SyntaxError => e - case e.message - when /unexpected keyword_end/ - # "syntax error, unexpected keyword_end" - # - # example: - # if ( - # end - # - # example: - # end - return :unrecoverable_error - when /unexpected '\.'/ - # "syntax error, unexpected '.'" - # - # example: - # . - return :unrecoverable_error - when /unexpected tREGEXP_BEG/ - # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" - # - # example: - # method / f / + end + + unknown = false + result.errors.each do |error| + case error.message + when /unexpected character literal|incomplete expression at|unexpected .%.|too short escape sequence/i + # Ignore these errors. Likely to appear only at the end of code. + # `[a, b ?` unexpected character literal, incomplete expression at + # `p a, %` unexpected '%' + # `/\u` too short escape sequence + when /unexpected write target/i + # `a,b` recoverable by `=v` + # `a,b,` recoverable by `c=v` + tok = tokens.last + tok = tokens[-2] if tok&.type == :COMMA + return :unrecoverable_error if tok && error.location.end_offset < tok.location.end_offset + when /(invalid|unexpected) (?:break|next|redo)/i + # Hard to check correctly, so treat it as always recoverable. + # `(break;1)` recoverable by `.f while true` + when / meets end of file|end-of-input|unterminated |cannot parse|could not parse/i + # These are recoverable errors if there is no other unrecoverable error + # `/aaa` unterminated regexp meets end of file + # `def f` unexpected end-of-input + # `"#{` unterminated string + # `:"aa` cannot parse the string part + # `def f =` could not parse the endless method body + when /is not allowed/i + # `@@` `$--` return :unrecoverable_error - when /unterminated (?:string|regexp) meets end of file/ - # "unterminated regexp meets end of file" - # - # example: - # / - # - # "unterminated string meets end of file" - # - # example: - # ' - return :recoverable_error - when /unexpected end-of-input/ - # "syntax error, unexpected end-of-input, expecting keyword_end" - # - # example: - # if true - # hoge - # if false - # fuga - # end - return :recoverable_error + when /unexpected |invalid |dynamic constant assignment|can't set variable|can't change the value|is not valid to get|variable capture in alternative pattern/i + # Likely to be unrecoverable except when the error is at the last token location. + # Unexpected: `class a`, `tap(&`, `def f(a,` + # Invalid: `a ? b :`, `/\u{`, `"\M-` + # `a,B` recoverable by `.c=v` dynamic constant assignment + # `a,$1` recoverable by `.f=v` Can't set variable + # `a,self` recoverable by `.f=v` Can't change the value of self + # `p foo?:` recoverable by `v` is not valid to get + # `x in 1|{x:` recoverable by `1}` variable capture in alternative pattern + return :unrecoverable_error if tokens.last && error.location.end_offset <= tokens.last.location.start_offset else - return :other_error + unknown = true end - ensure - $VERBOSE = verbose end - :valid + unknown ? :other_error : :recoverable_error end def calc_indent_level(opens) @@ -456,43 +391,36 @@ def ltype_from_open_tokens(opens) end end + # Check if the node on the last line is connected to previous line. + # Connected example: + # foo + # .bar; baz + # Not connected example: + # foo + # bar + # If it's connected, return the last line string. Otherwise, return false. def check_termination_in_prev_line(code, local_variables:) - tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables) - past_first_newline = false - index = tokens.rindex do |t| - # traverse first token before last line - if past_first_newline - if t.tok.include?("\n") - true - end - elsif t.tok.include?("\n") - past_first_newline = true - false - else - false - end - end + lines = code.lines + return false if lines.size < 2 - if index - first_token = nil - last_line_tokens = tokens[(index + 1)..(tokens.size - 1)] - last_line_tokens.each do |t| - unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event) - first_token = t - break - end - end + prev_line_result = Prism.parse(lines[...-1].join, scopes: [local_variables]) + return false unless prev_line_result.success? - if first_token && first_token.state != Ripper::EXPR_DOT - tokens_without_last_line = tokens[0..index] - code_without_last_line = tokens_without_last_line.map(&:tok).join - opens_without_last_line = NestingParser.open_nestings(Prism.parse_lex(code_without_last_line, scopes: [local_variables])) - if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line, local_variables: local_variables) - return last_line_tokens.map(&:tok).join - end - end + prev_nodes = prev_line_result.value.statements.body + whole_nodes = Prism.parse(code, scopes: [local_variables]).value.statements.body + + return false if whole_nodes.size < prev_nodes.size + return false unless prev_nodes.zip(whole_nodes).all? do |a, b| + a.location == b.location end - false + + # If the last line only contain comments, treat it as not connected to handle this case: + # receiver + # # comment + # .method + return false if lines.last.match?(/\A\s*#/) + + lines.last end end # :startdoc: diff --git a/lib/irb/source_finder.rb b/lib/irb/source_finder.rb index 1a6382089..0321b1bd9 100644 --- a/lib/irb/source_finder.rb +++ b/lib/irb/source_finder.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ruby-lex" +require 'prism' module IRB class SourceFinder @@ -44,21 +44,13 @@ def colorized_content private def find_end - lex = RubyLex.new code = file_content lines = code.lines[(@line - 1)..-1] - tokens = RubyLex.ripper_lex_without_warning(lines.join) - prev_tokens = [] # chunk with line number - tokens.chunk { |tok| tok.pos[0] }.each do |lnum, chunk| - code = lines[0..lnum].join - prev_tokens.concat chunk - continue = lex.should_continue?(prev_tokens) - syntax = lex.check_code_syntax(code, local_variables: []) - if !continue && syntax == :valid - return @line + lnum - end + lines.each_with_index do |line, index| + sub_code = lines.take(index + 1).join + return @line + index if Prism.parse_success?(sub_code) end @line end diff --git a/test/irb/test_irb.rb b/test/irb/test_irb.rb index b53003e60..cd658e239 100644 --- a/test/irb/test_irb.rb +++ b/test/irb/test_irb.rb @@ -686,7 +686,7 @@ def assert_rows_with_correct_indents(rows_with_spaces, assert_indent_level: fals def assert_indent_level(lines, expected) code = lines.map { |l| "#{l}\n" }.join # code should end with "\n" - _tokens, opens, _ = @irb.scanner.check_code_state(code, local_variables: []) + _continue, opens, _ = @irb.scanner.check_code_state(code, local_variables: []) indent_level = @irb.scanner.calc_indent_level(opens) error_message = "Calculated the wrong number of indent level for:\n #{lines.join("\n")}" assert_equal(expected, indent_level, error_message) diff --git a/test/irb/test_ruby_lex.rb b/test/irb/test_ruby_lex.rb index 533e27443..90ec37184 100644 --- a/test/irb/test_ruby_lex.rb +++ b/test/irb/test_ruby_lex.rb @@ -13,27 +13,6 @@ def teardown restore_encodings end - def test_interpolate_token_with_heredoc_and_unclosed_embexpr - code = <<~'EOC' - ①+< a = A.new => # irb(main):008> - irb(main):009> a - irb(main):010> .a - irb(main):011> .b + irb(main):009* a + irb(main):010* .a + irb(main):011* .b irb(main):012> .itself => true irb(main):013> @@ -219,26 +219,26 @@ class A def b; self; end; def c; true; end; end; irb(main):007> a = A.new => # irb(main):008> - irb(main):009> a - irb(main):010> .b - irb(main):011> # aaa + irb(main):009* a + irb(main):010* .b + irb(main):011* # aaa irb(main):012> .c => true irb(main):013> - irb(main):014> (a) + irb(main):014* (a) irb(main):015> &.b() => # irb(main):016> irb(main):017> class A def b; self; end; def c; true; end; end; irb(main):018> a = A.new => # - irb(main):019> a - irb(main):020> .b - irb(main):021> # aaa + irb(main):019* a + irb(main):020* .b + irb(main):021* # aaa irb(main):022> .c => true - irb(main):023> (a) - irb(main):024> &.b() + irb(main):023* (a) + irb(main):024* &.b() irb(main):025> .itself => # irb(main):026>