diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 61689a4..08ccbc6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,7 @@ jobs: uses: ruby/actions/.github/workflows/ruby_versions.yml@master with: engine: cruby + min_version: 3.3 test: needs: ruby-versions @@ -33,7 +34,9 @@ jobs: matrix: ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }} prism_version: - - 1.2.0 # Shipped with Ruby 3.4 as default parser https://www.ruby-lang.org/en/news/2024/12/25/ruby-3-4-0-released/ + # See https://stdgems.org/prism for which ruby version shipped with which prism version + - 0.19.0 + - 1.2.0 - 1.8.0 - head env: @@ -52,22 +55,3 @@ jobs: - name: test run: bin/rake test continue-on-error: ${{ matrix.ruby == 'head' }} - - test-disable-prism: - needs: ruby-versions - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }} - steps: - - name: Checkout code - uses: actions/checkout@v6 - - name: Set up Ruby - uses: ruby/setup-ruby@v1 - with: - ruby-version: ${{ matrix.ruby }} - bundler-cache: true - - name: test - run: SYNTAX_SUGGEST_DISABLE_PRISM=1 bin/rake test - continue-on-error: ${{ matrix.ruby == 'head' }} diff --git a/.standard.yml b/.standard.yml index 2547695..3a33af2 100644 --- a/.standard.yml +++ b/.standard.yml @@ -1 +1 @@ -ruby_version: 3.0.0 +ruby_version: 3.3.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index dc1a0a9..1d2b1de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,11 @@ ## HEAD (unreleased) +- Changed: Changed: Minimum supported Ruby version is now 3.3. (https://github.com/ruby/syntax_suggest/pull/246) + ## 2.0.3 - Fix: Correctly identify trailing slashes when using Prism > 1.8.0. (https://github.com/ruby/syntax_suggest/pull/243) +- Fix: Correctly handle `%I` delimiters. (https://github.com/ruby/syntax_suggest/pull/249) - Internal: Add tests to multiple versions of prism ## 2.0.2 diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb index 0f82d83..5054efa 100644 --- a/lib/syntax_suggest/api.rb +++ b/lib/syntax_suggest/api.rb @@ -7,25 +7,8 @@ require "pathname" require "timeout" -# We need Ripper loaded for `Prism.lex_compat` even if we're using Prism -# for lexing and parsing -require "ripper" - # Prism is the new parser, replacing Ripper -# -# We need to "dual boot" both for now because syntax_suggest -# supports older rubies that do not ship with syntax suggest. -# -# We also need the ability to control loading of this library -# so we can test that both modes work correctly in CI. -if (value = ENV["SYNTAX_SUGGEST_DISABLE_PRISM"]) - warn "Skipping loading prism due to SYNTAX_SUGGEST_DISABLE_PRISM=#{value}" -else - begin - require "prism" - rescue LoadError - end -end +require "prism" module SyntaxSuggest # Used to indicate a default value that cannot @@ -35,14 +18,6 @@ module SyntaxSuggest class Error < StandardError; end TIMEOUT_DEFAULT = ENV.fetch("SYNTAX_SUGGEST_TIMEOUT", 1).to_i - # SyntaxSuggest.use_prism_parser? [Private] - # - # Tells us if the prism parser is available for use - # or if we should fallback to `Ripper` - def self.use_prism_parser? - defined?(Prism) - end - # SyntaxSuggest.handle_error [Public] # # Takes a `SyntaxError` exception, uses the @@ -152,20 +127,11 @@ def self.valid_without?(without_lines:, code_lines:) # SyntaxSuggest.invalid? [Private] # # Opposite of `SyntaxSuggest.valid?` - if defined?(Prism) - def self.invalid?(source) - source = source.join if source.is_a?(Array) - source = source.to_s - - Prism.parse(source).failure? - end - else - def self.invalid?(source) - source = source.join if source.is_a?(Array) - source = source.to_s + def self.invalid?(source) + source = source.join if source.is_a?(Array) + source = source.to_s - Ripper.new(source).tap(&:parse).error? - end + Prism.parse(source).failure? end # SyntaxSuggest.valid? [Private] @@ -219,7 +185,6 @@ def self.valid?(source) require_relative "clean_document" # Helpers -require_relative "lex_all" require_relative "code_line" require_relative "code_block" require_relative "block_expand" @@ -231,3 +196,5 @@ def self.valid?(source) require_relative "pathname_from_message" require_relative "display_invalid_blocks" require_relative "parse_blocks_from_indent_line" +require_relative "visitor" +require_relative "token" diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb index ba307af..ca230d8 100644 --- a/lib/syntax_suggest/clean_document.rb +++ b/lib/syntax_suggest/clean_document.rb @@ -67,26 +67,9 @@ module SyntaxSuggest # All of these problems are fixed by joining the whole heredoc into a single # line. # - # ## Comments and whitespace - # - # Comments can throw off the way the lexer tells us that the line - # logically belongs with the next line. This is valid ruby but - # results in a different lex output than before: - # - # 1 User. - # 2 where(name: "schneems"). - # 3 # Comment here - # 4 first - # - # To handle this we can replace comment lines with empty lines - # and then re-lex the source. This removal and re-lexing preserves - # line index and document size, but generates an easier to work with - # document. - # class CleanDocument def initialize(source:) - lines = clean_sweep(source: source) - @document = CodeLine.from_source(lines.join, lines: lines) + @document = CodeLine.from_source(source) end # Call all of the document "cleaners" @@ -110,62 +93,6 @@ def to_s @document.join end - # Remove comments - # - # replace with empty newlines - # - # source = <<~'EOM' - # # Comment 1 - # puts "hello" - # # Comment 2 - # puts "world" - # EOM - # - # lines = CleanDocument.new(source: source).lines - # expect(lines[0].to_s).to eq("\n") - # expect(lines[1].to_s).to eq("puts "hello") - # expect(lines[2].to_s).to eq("\n") - # expect(lines[3].to_s).to eq("puts "world") - # - # Important: This must be done before lexing. - # - # After this change is made, we lex the document because - # removing comments can change how the doc is parsed. - # - # For example: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # # comment - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(1) - # - # After the comment is removed: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(2) - # - def clean_sweep(source:) - # Match comments, but not HEREDOC strings with #{variable} interpolation - # https://rubular.com/r/HPwtW9OYxKUHXQ - source.lines.map do |line| - if line.match?(/^\s*#([^{].*|)$/) - $/ - else - line - end - end - end - # Smushes all heredoc lines into one line # # source = <<~'EOM' @@ -182,11 +109,11 @@ def join_heredoc! start_index_stack = [] heredoc_beg_end_index = [] lines.each do |line| - line.lex.each do |lex_value| - case lex_value.type - when :on_heredoc_beg + line.tokens.each do |token| + case token.type + when :HEREDOC_START start_index_stack << line.index - when :on_heredoc_end + when :HEREDOC_END start_index = start_index_stack.pop end_index = line.index heredoc_beg_end_index << [start_index, end_index] @@ -212,20 +139,10 @@ def join_heredoc! # expect(lines[0].to_s).to eq(source) # expect(lines[1].to_s).to eq("") # - # The one known case this doesn't handle is: - # - # Ripper.lex <<~EOM - # a && - # b || - # c - # EOM - # - # For some reason this introduces `on_ignore_newline` but with BEG type - # def join_consecutive! - consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line| + consecutive_groups = @document.select(&:consecutive?).map do |code_line| take_while_including(code_line.index..) do |line| - line.ignore_newline_not_beg? + line.consecutive? end end @@ -273,16 +190,17 @@ def join_groups(groups) # Join group into the first line @document[line.index] = CodeLine.new( - lex: lines.map(&:lex).flatten, + tokens: lines.map(&:tokens).flatten, line: lines.join, - index: line.index + index: line.index, + consecutive: false ) # Hide the rest of the lines lines[1..].each do |line| # The above lines already have newlines in them, if add more # then there will be double newline, use an empty line instead - @document[line.index] = CodeLine.new(line: "", index: line.index, lex: []) + @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false) end end self diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb index 76ca892..d872fe9 100644 --- a/lib/syntax_suggest/code_line.rb +++ b/lib/syntax_suggest/code_line.rb @@ -26,23 +26,34 @@ class CodeLine # Returns an array of CodeLine objects # from the source string - def self.from_source(source, lines: nil) - lines ||= source.lines - lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } - lines.map.with_index do |line, index| + def self.from_source(source) + ast, tokens = Prism.parse_lex(source).value + visitor = Visitor.new + visitor.visit(ast) + tokens.sort_by! { |token, _state| token.location.start_line } + + prev_token = nil + tokens.map! do |token, _state| + prev_token = Token.new(token, prev_token, visitor) + end + + tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token } + source.lines.map.with_index do |line, index| CodeLine.new( line: line, index: index, - lex: lex_array_for_line[index + 1] + tokens: tokens_for_line[index + 1], + consecutive: visitor.consecutive_lines.include?(index + 1) ) end end - attr_reader :line, :index, :lex, :line_number, :indent - def initialize(line:, index:, lex:) - @lex = lex + attr_reader :line, :index, :tokens, :line_number, :indent + def initialize(line:, index:, tokens:, consecutive:) + @tokens = tokens @line = line @index = index + @consecutive = consecutive @original = line @line_number = @index + 1 strip_line = line.dup @@ -151,29 +162,16 @@ def <=>(other) index <=> other.index end - # [Not stable API] - # - # Lines that have a `on_ignored_nl` type token and NOT - # a `BEG` type seem to be a good proxy for the ability - # to join multiple lines into one. - # - # This predicate method is used to determine when those - # two criteria have been met. - # - # The one known case this doesn't handle is: - # - # Ripper.lex <<~EOM - # a && - # b || - # c - # EOM - # - # For some reason this introduces `on_ignore_newline` but with BEG type - def ignore_newline_not_beg? - @ignore_newline_not_beg + # Can this line be logically joined together + # with the following line? Determined by walking + # the AST + def consecutive? + @consecutive end - # Determines if the given line has a trailing slash + # Determines if the given line has a trailing slash. + # Simply check if the line contains a backslash after + # the content of the last token. # # lines = CodeLine.from_source(<<~EOM) # it "foo" \ @@ -181,61 +179,19 @@ def ignore_newline_not_beg? # expect(lines.first.trailing_slash?).to eq(true) # def trailing_slash? - last = @lex.last - - # Older versions of prism diverged slightly from Ripper in compatibility mode - case last&.type - when :on_sp - last.token == TRAILING_SLASH - when :on_tstring_end - true - else - false - end + return unless (last = @tokens.last) + @line.byteindex(TRAILING_SLASH, last.location.end_column) != nil end - # Endless method detection - # - # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab - # Detecting a "oneliner" seems to need a state machine. - # This can be done by looking mostly at the "state" (last value): - # - # ENDFN -> BEG (token = '=' ) -> END - # private def set_kw_end - oneliner_count = 0 - in_oneliner_def = nil - kw_count = 0 end_count = 0 - @ignore_newline_not_beg = false - @lex.each do |lex| - kw_count += 1 if lex.is_kw? - end_count += 1 if lex.is_end? - - if lex.type == :on_ignored_nl - @ignore_newline_not_beg = !lex.expr_beg? - end - - if in_oneliner_def.nil? - in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN) - elsif lex.state.allbits?(Ripper::EXPR_ENDFN) - # Continue - elsif lex.state.allbits?(Ripper::EXPR_BEG) - in_oneliner_def = :BODY if lex.token == "=" - elsif lex.state.allbits?(Ripper::EXPR_END) - # We found an endless method, count it - oneliner_count += 1 if in_oneliner_def == :BODY - - in_oneliner_def = nil - else - in_oneliner_def = nil - end + @tokens.each do |token| + kw_count += 1 if token.is_kw? + end_count += 1 if token.is_end? end - kw_count -= oneliner_count - @is_kw = (kw_count - end_count) > 0 @is_end = (end_count - kw_count) > 0 end diff --git a/lib/syntax_suggest/core_ext.rb b/lib/syntax_suggest/core_ext.rb index 94f57ba..ffbc922 100644 --- a/lib/syntax_suggest/core_ext.rb +++ b/lib/syntax_suggest/core_ext.rb @@ -1,96 +1,47 @@ # frozen_string_literal: true -# Ruby 3.2+ has a cleaner way to hook into Ruby that doesn't use `require` -if SyntaxError.method_defined?(:detailed_message) - module SyntaxSuggest - # SyntaxSuggest.module_for_detailed_message [Private] - # - # Used to monkeypatch SyntaxError via Module.prepend - def self.module_for_detailed_message - Module.new { - def detailed_message(highlight: true, syntax_suggest: true, **kwargs) - return super unless syntax_suggest - - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - message = super - - if path - file = Pathname.new(path) - io = SyntaxSuggest::MiniStringIO.new - - SyntaxSuggest.call( - io: io, - source: file.read, - filename: file, - terminal: highlight - ) - annotation = io.string - - annotation += "\n" unless annotation.end_with?("\n") - - annotation + message - else - message - end - rescue => e - if ENV["SYNTAX_SUGGEST_DEBUG"] - $stderr.warn(e.message) - $stderr.warn(e.backtrace) - end - - # Ignore internal errors +module SyntaxSuggest + # SyntaxSuggest.module_for_detailed_message [Private] + # + # Used to monkeypatch SyntaxError via Module.prepend + def self.module_for_detailed_message + Module.new { + def detailed_message(highlight: true, syntax_suggest: true, **kwargs) + return super unless syntax_suggest + + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + message = super + + if path + file = Pathname.new(path) + io = SyntaxSuggest::MiniStringIO.new + + SyntaxSuggest.call( + io: io, + source: file.read, + filename: file, + terminal: highlight + ) + annotation = io.string + + annotation += "\n" unless annotation.end_with?("\n") + + annotation + message + else message end - } - end - end - - SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message) -else - autoload :Pathname, "pathname" - - #-- - # Monkey patch kernel to ensure that all `require` calls call the same - # method - #++ - module Kernel - # :stopdoc: - - module_function - - alias_method :syntax_suggest_original_require, :require - alias_method :syntax_suggest_original_require_relative, :require_relative - alias_method :syntax_suggest_original_load, :load - - def load(file, wrap = false) - syntax_suggest_original_load(file) - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end - - def require(file) - syntax_suggest_original_require(file) - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end + rescue => e + if ENV["SYNTAX_SUGGEST_DEBUG"] + $stderr.warn(e.message) + $stderr.warn(e.backtrace) + end - def require_relative(file) - if Pathname.new(file).absolute? - syntax_suggest_original_require file - else - relative_from = caller_locations(1..1).first - relative_from_path = relative_from.absolute_path || relative_from.path - syntax_suggest_original_require File.expand_path("../#{file}", relative_from_path) + # Ignore internal errors + message end - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end + } end end + +SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message) diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb index 0d80c4d..d7f5262 100644 --- a/lib/syntax_suggest/explain_syntax.rb +++ b/lib/syntax_suggest/explain_syntax.rb @@ -1,19 +1,11 @@ # frozen_string_literal: true -require_relative "left_right_lex_count" - -if !SyntaxSuggest.use_prism_parser? - require_relative "ripper_errors" -end +require_relative "left_right_token_count" module SyntaxSuggest class GetParseErrors def self.errors(source) - if SyntaxSuggest.use_prism_parser? - Prism.parse(source).errors.map(&:message) - else - RipperErrors.new(source).call.errors - end + Prism.parse(source).errors.map(&:message) end end @@ -53,14 +45,14 @@ class ExplainSyntax def initialize(code_lines:) @code_lines = code_lines - @left_right = LeftRightLexCount.new + @left_right = LeftRightTokenCount.new @missing = nil end def call @code_lines.each do |line| - line.lex.each do |lex| - @left_right.count_lex(lex) + line.tokens.each do |token| + @left_right.count_token(token) end end diff --git a/lib/syntax_suggest/left_right_lex_count.rb b/lib/syntax_suggest/left_right_token_count.rb similarity index 73% rename from lib/syntax_suggest/left_right_lex_count.rb rename to lib/syntax_suggest/left_right_token_count.rb index 6fcae74..e0562ba 100644 --- a/lib/syntax_suggest/left_right_lex_count.rb +++ b/lib/syntax_suggest/left_right_token_count.rb @@ -9,19 +9,19 @@ module SyntaxSuggest # # Example: # - # left_right = LeftRightLexCount.new + # left_right = LeftRightTokenCount.new # left_right.count_kw # left_right.missing.first # # => "end" # - # left_right = LeftRightLexCount.new + # left_right = LeftRightTokenCount.new # source = "{ a: b, c: d" # Note missing '}' - # LexAll.new(source: source).each do |lex| - # left_right.count_lex(lex) + # LexAll.new(source: source).each do |token| + # left_right.count_token(token) # end # left_right.missing.first # # => "}" - class LeftRightLexCount + class LeftRightTokenCount def initialize @kw_count = 0 @end_count = 0 @@ -49,52 +49,46 @@ def count_end # # Example: # - # left_right = LeftRightLexCount.new - # left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG)) + # token = CodeLine.from_source("{").first.tokens.first + # left_right = LeftRightTokenCount.new + # left_right.count_token(Token.new(token) # left_right.count_for_char("{") # # => 1 # left_right.count_for_char("}") # # => 0 - def count_lex(lex) - case lex.type - when :on_tstring_content + def count_token(token) + case token.type + when :STRING_CONTENT # ^^^ # Means it's a string or a symbol `"{"` rather than being # part of a data structure (like a hash) `{ a: b }` # ignore it. - when :on_words_beg, :on_symbos_beg, :on_qwords_beg, - :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg + when :PERCENT_UPPER_W, :PERCENT_UPPER_I, :PERCENT_LOWER_W, + :PERCENT_LOWER_I, :REGEXP_BEGIN, :STRING_BEGIN # ^^^ # Handle shorthand syntaxes like `%Q{ i am a string }` # # The start token will be the full thing `%Q{` but we # need to count it as if it's a `{`. Any token # can be used - char = lex.token[-1] + char = token.value[-1] @count_for_char[char] += 1 if @count_for_char.key?(char) - when :on_embexpr_beg + when :EMBEXPR_BEGIN # ^^^ # Embedded string expressions like `"#{foo} <-embed"` # are parsed with chars: # - # `#{` as :on_embexpr_beg - # `}` as :on_embexpr_end - # - # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end - # because sometimes the lexer thinks something is an embed - # string end, when it is not like `lol = }` (no clue why). + # `#{` as :EMBEXPR_BEGIN + # `}` as :EMBEXPR_END # # When we see `#{` count it as a `{` or we will # have a mis-match count. # - case lex.token - when "\#{" - @count_for_char["{"] += 1 - end + @count_for_char["{"] += 1 else - @end_count += 1 if lex.is_end? - @kw_count += 1 if lex.is_kw? - @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token) + @end_count += 1 if token.is_end? + @kw_count += 1 if token.is_kw? + @count_for_char[token.value] += 1 if @count_for_char.key?(token.value) end end diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb deleted file mode 100644 index c16fbb5..0000000 --- a/lib/syntax_suggest/lex_all.rb +++ /dev/null @@ -1,74 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Ripper.lex is not guaranteed to lex the entire source document - # - # This class guarantees the whole document is lex-ed by iteratively - # lexing the document where ripper stopped. - # - # Prism likely doesn't have the same problem. Once ripper support is removed - # we can likely reduce the complexity here if not remove the whole concept. - # - # Example usage: - # - # lex = LexAll.new(source: source) - # lex.each do |value| - # puts value.line - # end - class LexAll - include Enumerable - - def initialize(source:, source_lines: nil) - @lex = self.class.lex(source, 1) - lineno = @lex.last[0][0] + 1 - source_lines ||= source.lines - last_lineno = source_lines.length - - until lineno >= last_lineno - lines = source_lines[lineno..] - - @lex.concat( - self.class.lex(lines.join, lineno + 1) - ) - - lineno = @lex.last[0].first + 1 - end - - last_lex = nil - @lex.map! { |elem| - last_lex = LexValue.new(elem[0].first, elem[1], elem[2], elem[3], last_lex) - } - end - - if SyntaxSuggest.use_prism_parser? - def self.lex(source, line_number) - Prism.lex_compat(source, line: line_number).value.sort_by { |values| values[0] } - end - else - def self.lex(source, line_number) - Ripper::Lexer.new(source, "-", line_number).parse.sort_by(&:pos) - end - end - - def to_a - @lex - end - - def each - return @lex.each unless block_given? - @lex.each do |x| - yield x - end - end - - def [](index) - @lex[index] - end - - def last - @lex.last - end - end -end - -require_relative "lex_value" diff --git a/lib/syntax_suggest/lex_value.rb b/lib/syntax_suggest/lex_value.rb deleted file mode 100644 index b46a332..0000000 --- a/lib/syntax_suggest/lex_value.rb +++ /dev/null @@ -1,70 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Value object for accessing lex values - # - # This lex: - # - # [1, 0], :on_ident, "describe", CMDARG - # - # Would translate into: - # - # lex.line # => 1 - # lex.type # => :on_indent - # lex.token # => "describe" - class LexValue - attr_reader :line, :type, :token, :state - - def initialize(line, type, token, state, last_lex = nil) - @line = line - @type = type - @token = token - @state = state - - set_kw_end(last_lex) - end - - private def set_kw_end(last_lex) - @is_end = false - @is_kw = false - return if type != :on_kw - - return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953 - - case token - when "if", "unless", "while", "until" - # Only count if/unless when it's not a "trailing" if/unless - # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375 - @is_kw = true unless expr_label? - when "def", "case", "for", "begin", "class", "module", "do" - @is_kw = true - when "end" - @is_end = true - end - end - - def fname? - state.allbits?(Ripper::EXPR_FNAME) - end - - def ignore_newline? - type == :on_ignored_nl - end - - def is_end? - @is_end - end - - def is_kw? - @is_kw - end - - def expr_beg? - state.anybits?(Ripper::EXPR_BEG) - end - - def expr_label? - state.allbits?(Ripper::EXPR_LABEL) - end - end -end diff --git a/lib/syntax_suggest/ripper_errors.rb b/lib/syntax_suggest/ripper_errors.rb deleted file mode 100644 index 4e2bc90..0000000 --- a/lib/syntax_suggest/ripper_errors.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Capture parse errors from Ripper - # - # Prism returns the errors with their messages, but Ripper - # does not. To get them we must make a custom subclass. - # - # Example: - # - # puts RipperErrors.new(" def foo").call.errors - # # => ["syntax error, unexpected end-of-input, expecting ';' or '\\n'"] - class RipperErrors < Ripper - attr_reader :errors - - # Comes from ripper, called - # on every parse error, msg - # is a string - def on_parse_error(msg) - @errors ||= [] - @errors << msg - end - - alias_method :on_alias_error, :on_parse_error - alias_method :on_assign_error, :on_parse_error - alias_method :on_class_name_error, :on_parse_error - alias_method :on_param_error, :on_parse_error - alias_method :compile_error, :on_parse_error - - def call - @run_once ||= begin - @errors = [] - parse - true - end - self - end - end -end diff --git a/lib/syntax_suggest/token.rb b/lib/syntax_suggest/token.rb new file mode 100644 index 0000000..f5967d9 --- /dev/null +++ b/lib/syntax_suggest/token.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Value object for accessing lex values + # + # This lex: + # + # [IDENTIFIER(1,0)-(1,8)("describe"), 32] + # + # Would translate into: + # + # lex.location # => (1,0)-(1,8) + # lex.type # => :IDENTIFIER + # lex.token # => "describe" + class Token + attr_reader :location, :type, :value + + KW_TYPES = %i[ + KEYWORD_IF KEYWORD_UNLESS KEYWORD_WHILE KEYWORD_UNTIL + KEYWORD_DEF KEYWORD_CASE KEYWORD_FOR KEYWORD_BEGIN KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DO KEYWORD_DO_LOOP + ].to_set.freeze + private_constant :KW_TYPES + + def initialize(prism_token, previous_prism_token, visitor) + @location = prism_token.location + @type = prism_token.type + @value = prism_token.value + + # Prism lexes `:module` as SYMBOL_BEGIN, KEYWORD_MODULE + # https://github.com/ruby/prism/issues/3940 + symbol_content = previous_prism_token&.type == :SYMBOL_BEGIN + @is_kw = KW_TYPES.include?(@type) && !symbol_content && !visitor.endless_def_keyword_locs.include?(@location) + @is_end = @type == :KEYWORD_END + end + + def line + @location.start_line + end + + def is_end? + @is_end + end + + def is_kw? + @is_kw + end + end +end diff --git a/lib/syntax_suggest/visitor.rb b/lib/syntax_suggest/visitor.rb new file mode 100644 index 0000000..ff1a36e --- /dev/null +++ b/lib/syntax_suggest/visitor.rb @@ -0,0 +1,52 @@ +module SyntaxSuggest + # A visitor that walks the AST and pulls out information + # that is too dificult to discern by just looking at tokens + class Visitor < Prism::Visitor + attr_reader :endless_def_keyword_locs + + def initialize + @endless_def_keyword_locs = [] + @consecutive_lines = {} + end + + def consecutive_lines + @consecutive_lines.keys.sort + end + + # Record lines where a method call is logically connected + # to subsequent lines. This is the case when a method call + # is broken up by a newline + def visit_call_node(node) + receiver_loc = node.receiver&.location + call_operator_loc = node.call_operator_loc + message_loc = node.message_loc + if receiver_loc && call_operator_loc && message_loc + # foo + # .bar + if receiver_loc.end_line != call_operator_loc.start_line && call_operator_loc.start_line == message_loc.start_line + (receiver_loc.end_line..call_operator_loc.start_line - 1).each do |line| + @consecutive_lines[line] = true + end + end + + # foo. + # bar + if receiver_loc.end_line == call_operator_loc.start_line && call_operator_loc.start_line != message_loc.start_line + (call_operator_loc.start_line..message_loc.start_line - 1).each do |line| + @consecutive_lines[line] = true + end + end + end + super + end + + # Endless method definitions like `def foo = 123` are valid without + # an `end` keyword. We record their keyword here so that we can later + # skip considering them for keywords since they have no coresponding + # end + def visit_def_node(node) + @endless_def_keyword_locs << node.def_keyword_loc if node.equal_loc + super + end + end +end diff --git a/spec/integration/ruby_command_line_spec.rb b/spec/integration/ruby_command_line_spec.rb index c1ec4be..02354ce 100644 --- a/spec/integration/ruby_command_line_spec.rb +++ b/spec/integration/ruby_command_line_spec.rb @@ -94,8 +94,6 @@ module SyntaxSuggest end it "gem can be tested when executing on Ruby with default gem included" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - out = `#{ruby} -I#{lib_dir} -rsyntax_suggest -e "puts SyntaxError.instance_method(:detailed_message).source_location" 2>&1` expect($?.success?).to be_truthy @@ -103,8 +101,6 @@ module SyntaxSuggest end it "annotates a syntax error in Ruby 3.2+ when require is not used" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - Dir.mktmpdir do |dir| tmpdir = Pathname(dir) script = tmpdir.join("script.rb") diff --git a/spec/unit/api_spec.rb b/spec/unit/api_spec.rb index e900b9e..9299a17 100644 --- a/spec/unit/api_spec.rb +++ b/spec/unit/api_spec.rb @@ -8,12 +8,6 @@ module SyntaxSuggest RSpec.describe "Top level SyntaxSuggest api" do - it "doesn't load prism if env var is set" do - skip("SYNTAX_SUGGEST_DISABLE_PRISM not set") unless ENV["SYNTAX_SUGGEST_DISABLE_PRISM"] - - expect(SyntaxSuggest.use_prism_parser?).to be_falsey - end - it "has a `handle_error` interface" do fake_error = Object.new def fake_error.message @@ -69,8 +63,6 @@ def fake_error.message end it "respects highlight API" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - core_ext_file = lib_dir.join("syntax_suggest").join("core_ext.rb") require_relative core_ext_file @@ -91,8 +83,6 @@ def detailed_message(**kwargs) end it "can be disabled via falsey kwarg" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - core_ext_file = lib_dir.join("syntax_suggest").join("core_ext.rb") require_relative core_ext_file diff --git a/spec/unit/clean_document_spec.rb b/spec/unit/clean_document_spec.rb index 5b5ca04..47d9639 100644 --- a/spec/unit/clean_document_spec.rb +++ b/spec/unit/clean_document_spec.rb @@ -103,19 +103,6 @@ module SyntaxSuggest expect(lines.count).to eq(2) end - it "comments: removes comments" do - source = <<~EOM - # lol - puts "what" - # yolo - EOM - - lines = CleanDocument.new(source: source).lines - expect(lines[0].to_s).to eq($/) - expect(lines[1].to_s).to eq('puts "what"' + $/) - expect(lines[2].to_s).to eq($/) - end - it "trailing slash: does not join trailing do" do # Some keywords and syntaxes trigger the "ignored line" # lex output, we ignore them by filtering by BEG diff --git a/spec/unit/code_block_spec.rb b/spec/unit/code_block_spec.rb index 3ab2751..dfea307 100644 --- a/spec/unit/code_block_spec.rb +++ b/spec/unit/code_block_spec.rb @@ -33,7 +33,7 @@ def foo array = [block_2, block_1, block_0].sort expect(array.last).to eq(block_2) - block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4, lex: [])) + block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4, tokens: [], consecutive: false)) array.prepend(block) expect(array.max).to eq(block) end diff --git a/spec/unit/code_line_spec.rb b/spec/unit/code_line_spec.rb index 5b62cc2..8b5fa1a 100644 --- a/spec/unit/code_line_spec.rb +++ b/spec/unit/code_line_spec.rb @@ -17,8 +17,6 @@ def to_json(*opts) end it "supports endless method definitions" do - skip("Unsupported ruby version") unless Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3") - line = CodeLine.from_source(<<~EOM).first def square(x) = x * x EOM @@ -46,7 +44,7 @@ def square(x) = x * x EOM # Indicates line 1 can join 2, 2 can join 3, but 3 won't join it's next line - expect(code_lines.map(&:ignore_newline_not_beg?)).to eq([true, true, false, false]) + expect(code_lines.map(&:consecutive?)).to eq([true, true, false, false]) end it "trailing if" do diff --git a/spec/unit/core_ext_spec.rb b/spec/unit/core_ext_spec.rb index 499c38a..d579cc8 100644 --- a/spec/unit/core_ext_spec.rb +++ b/spec/unit/core_ext_spec.rb @@ -3,8 +3,6 @@ module SyntaxSuggest RSpec.describe "Core extension" do it "SyntaxError monkepatch ensures there is a newline to the end of the file" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - Dir.mktmpdir do |dir| tmpdir = Pathname(dir) file = tmpdir.join("file.rb") diff --git a/spec/unit/explain_syntax_spec.rb b/spec/unit/explain_syntax_spec.rb index c62a42b..7ddb32b 100644 --- a/spec/unit/explain_syntax_spec.rb +++ b/spec/unit/explain_syntax_spec.rb @@ -17,9 +17,23 @@ module SyntaxSuggest expect(explain.errors.join.strip).to_not be_empty end - it "handles %w[]" do + %w[w W i I].each do |type| + it "handles %#{type}-style array" do + source = <<~EOM + node.is_a?(Op) && %#{type}[| ||].include?(node.value) && + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + end + end + + it "handles %r-style regexp" do source = <<~EOM - node.is_a?(Op) && %w[| ||].include?(node.value) && + node.is_a?(Op) && %r{| ||}.include?(node.value) && EOM explain = ExplainSyntax.new( @@ -29,6 +43,20 @@ module SyntaxSuggest expect(explain.missing).to eq([]) end + ["", "q", "Q"].each do |type| + it "handles %#{type}-style string" do + source = <<~EOM + node.is_a?(Op) && %#{type}(| ||).include?(node.value) && + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + end + end + it "doesn't falsely identify strings or symbols as critical chars" do source = <<~EOM a = ['(', '{', '[', '|'] diff --git a/spec/unit/lex_all_spec.rb b/spec/unit/lex_all_spec.rb deleted file mode 100644 index 9621c9e..0000000 --- a/spec/unit/lex_all_spec.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -require_relative "../spec_helper" - -module SyntaxSuggest - RSpec.describe "EndBlockParse" do - it "finds blocks based on `end` keyword" do - source = <<~EOM - describe "cat" # 1 - Cat.call do # 2 - end # 3 - end # 4 - # 5 - it "dog" do # 6 - Dog.call do # 7 - end # 8 - end # 9 - EOM - - lex = LexAll.new(source: source) - expect(lex.map(&:token).to_s).to include("dog") - expect(lex.first.line).to eq(1) - expect(lex.last.line).to eq(9) - end - end -end diff --git a/syntax_suggest.gemspec b/syntax_suggest.gemspec index 756a85b..44e458a 100644 --- a/syntax_suggest.gemspec +++ b/syntax_suggest.gemspec @@ -16,7 +16,7 @@ Gem::Specification.new do |spec| spec.description = 'When you get an "unexpected end" in your syntax this gem helps you find it' spec.homepage = "https://github.com/ruby/syntax_suggest.git" spec.license = "MIT" - spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0") + spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0") spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = "https://github.com/ruby/syntax_suggest.git"