ruby
diff --git a/‎lib/syntax_suggest/api.rb‎
Lines changed: 2 additions & 4 deletions b/‎lib/syntax_suggest/api.rb‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎lib/syntax_suggest/clean_document.rb‎
Lines changed: 9 additions & 98 deletions b/‎lib/syntax_suggest/clean_document.rb‎
Lines changed: 9 additions & 98 deletions
diff --git a/‎lib/syntax_suggest/code_line.rb‎
Lines changed: 46 additions & 68 deletions b/‎lib/syntax_suggest/code_line.rb‎
Lines changed: 46 additions & 68 deletions
@@ -9,9 +9,6 @@
 
 # Prism is the new parser, replacing Ripper
 require "prism"
-# We need Ripper loaded for `Prism.lex_compat` even if we're using Prism
-# for lexing and parsing
-require "ripper"
 
 module SyntaxSuggest
   # Used to indicate a default value that cannot
@@ -188,7 +185,6 @@ def self.valid?(source)
 require_relative "clean_document"
 
 # Helpers
-require_relative "lex_all"
 require_relative "code_line"
 require_relative "code_block"
 require_relative "block_expand"
@@ -200,3 +196,5 @@ def self.valid?(source)
 require_relative "pathname_from_message"
 require_relative "display_invalid_blocks"
 require_relative "parse_blocks_from_indent_line"
+require_relative "visitor"
+require_relative "token"
@@ -67,26 +67,9 @@ module SyntaxSuggest
   # All of these problems are fixed by joining the whole heredoc into a single
   # line.
   #
-  # ## Comments and whitespace
-  #
-  # Comments can throw off the way the lexer tells us that the line
-  # logically belongs with the next line. This is valid ruby but
-  # results in a different lex output than before:
-  #
-  #     1 User.
-  #     2   where(name: "schneems").
-  #     3   # Comment here
-  #     4   first
-  #
-  # To handle this we can replace comment lines with empty lines
-  # and then re-lex the source. This removal and re-lexing preserves
-  # line index and document size, but generates an easier to work with
-  # document.
-  #
   class CleanDocument
     def initialize(source:)
-      lines = clean_sweep(source: source)
-      @document = CodeLine.from_source(lines.join)
+      @document = CodeLine.from_source(source)
     end
 
     # Call all of the document "cleaners"
@@ -110,62 +93,6 @@ def to_s
       @document.join
     end
 
-    # Remove comments
-    #
-    # replace with empty newlines
-    #
-    #     source = <<~'EOM'
-    #       # Comment 1
-    #       puts "hello"
-    #       # Comment 2
-    #       puts "world"
-    #     EOM
-    #
-    #     lines = CleanDocument.new(source: source).lines
-    #     expect(lines[0].to_s).to eq("\n")
-    #     expect(lines[1].to_s).to eq("puts "hello")
-    #     expect(lines[2].to_s).to eq("\n")
-    #     expect(lines[3].to_s).to eq("puts "world")
-    #
-    # Important: This must be done before lexing.
-    #
-    # After this change is made, we lex the document because
-    # removing comments can change how the doc is parsed.
-    #
-    # For example:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #         # comment
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #       values.count {|v| v.type == :on_ignored_nl}
-    #     ).to eq(1)
-    #
-    # After the comment is removed:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #      values.count {|v| v.type == :on_ignored_nl}
-    #    ).to eq(2)
-    #
-    def clean_sweep(source:)
-      # Match comments, but not HEREDOC strings with #{variable} interpolation
-      # https://rubular.com/r/HPwtW9OYxKUHXQ
-      source.lines.map do |line|
-        if line.match?(/^\s*#([^{].*|)$/)
-          $/
-        else
-          line
-        end
-      end
-    end
-
     # Smushes all heredoc lines into one line
     #
     #     source = <<~'EOM'
@@ -184,9 +111,9 @@ def join_heredoc!
       lines.each do |line|
         line.tokens.each do |token|
           case token.type
-          when :on_heredoc_beg
+          when :HEREDOC_START
             start_index_stack << line.index
-          when :on_heredoc_end
+          when :HEREDOC_END
             start_index = start_index_stack.pop
             end_index = line.index
             heredoc_beg_end_index << [start_index, end_index]
@@ -212,20 +139,10 @@ def join_heredoc!
     #     expect(lines[0].to_s).to eq(source)
     #     expect(lines[1].to_s).to eq("")
     #
-    # The one known case this doesn't handle is:
-    #
-    #     Ripper.lex <<~EOM
-    #       a &&
-    #        b ||
-    #        c
-    #     EOM
-    #
-    # For some reason this introduces `on_ignore_newline` but with BEG type
-    #
     def join_consecutive!
-      consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
+      consecutive_groups = @document.select(&:consecutive?).map do |code_line|
         take_while_including(code_line.index..) do |line|
-          line.ignore_newline_not_beg?
+          line.consecutive?
         end
       end
 
@@ -264,25 +181,19 @@ def join_trailing_slash!
     # To preserve document size, empty lines are placed
     # in the place of the lines that were "joined"
     def join_groups(groups)
-      groups.each do |lines|
-        line = lines.first
-
+      groups.each do |line, *other_lines|
         # Handle the case of multiple groups in a row
         # if one is already replaced, move on
         next if @document[line.index].empty?
 
         # Join group into the first line
-        @document[line.index] = CodeLine.new(
-          tokens: lines.map(&:tokens).flatten,
-          line: lines.join,
-          index: line.index
-        )
+        @document[line.index] = line.merge(other_lines)
 
         # Hide the rest of the lines
-        lines[1..].each do |line|
+        other_lines.each do |other|
           # The above lines already have newlines in them, if add more
           # then there will be double newline, use an empty line instead
-          @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [])
+          @document[other.index] = CodeLine.empty(other.index)
         end
       end
       self
 
@@ -27,22 +27,41 @@ class CodeLine
     # Returns an array of CodeLine objects
     # from the source string
     def self.from_source(source)
-      tokens = LexAll.new(source: source)
+      ast, tokens = Prism.parse_lex(source).value
+      tokens.sort_by! { |token, _state| token.location.start_line }
+      prev_token = nil
+      tokens.map! do |token, _state|
+        prev_token = Token.new(token, prev_token)
+      end
+
+      visitor = Visitor.new
+      visitor.visit(ast)
+
       tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token }
       source.lines.map.with_index do |line, index|
         CodeLine.new(
           line: line,
           index: index,
-          tokens: tokens_for_line[index + 1]
+          tokens: tokens_for_line[index + 1],
+          endless_def_count: visitor.endless_def_count[index + 1],
+          consecutive: visitor.consecutive_lines.include?(index + 1)
         )
       end
     end
 
-    attr_reader :line, :index, :tokens, :line_number, :indent
-    def initialize(line:, index:, tokens:)
+    # Returns a new CodeLine for the empty string,
+    # used as a placeholder when joining lines
+    def self.empty(index)
+      CodeLine.new(line: "", index: index, tokens: [], endless_def_count: 0, consecutive: false)
+    end
+
+    attr_reader :line, :index, :tokens, :line_number, :indent, :endless_def_count
+    def initialize(line:, index:, tokens:, endless_def_count:, consecutive:)
       @tokens = tokens
       @line = line
       @index = index
+      @endless_def_count = endless_def_count
+      @consecutive = consecutive
       @original = line
       @line_number = @index + 1
       strip_line = line.dup
@@ -57,6 +76,18 @@ def initialize(line:, index:, tokens:)
       set_kw_end
     end
 
+    # Return a new codeline, with the given lines
+    # merged into the new instance
+    def merge(other)
+      CodeLine.new(
+        tokens: tokens + other.map(&:tokens).flatten,
+        line: line + other.join,
+        index: index,
+        endless_def_count: endless_def_count + other.sum { |line| line.endless_def_count },
+        consecutive: false
+      )
+    end
+
     # Used for stable sort via indentation level
     #
     # Ruby's sort is not "stable" meaning that when
@@ -151,90 +182,37 @@ def <=>(other)
       index <=> other.index
     end
 
-    # [Not stable API]
-    #
-    # Lines that have a `on_ignored_nl` type token and NOT
-    # a `BEG` type seem to be a good proxy for the ability
-    # to join multiple lines into one.
-    #
-    # This predicate method is used to determine when those
-    # two criteria have been met.
-    #
-    # The one known case this doesn't handle is:
-    #
-    #     Ripper.lex <<~EOM
-    #       a &&
-    #        b ||
-    #        c
-    #     EOM
-    #
-    # For some reason this introduces `on_ignore_newline` but with BEG type
-    def ignore_newline_not_beg?
-      @ignore_newline_not_beg
+    # Can this line be logically joined together
+    # with the following line? Determined by walking
+    # the AST
+    def consecutive?
+      @consecutive
     end
 
-    # Determines if the given line has a trailing slash
+    # Determines if the given line has a trailing slash.
+    # Simply check if the line contains a backslash after
+    # the content of the last token.
     #
     #     lines = CodeLine.from_source(<<~EOM)
     #       it "foo" \
     #     EOM
     #     expect(lines.first.trailing_slash?).to eq(true)
     #
     def trailing_slash?
-      last = @tokens.last
-
-      # Older versions of prism diverged slightly from Ripper in compatibility mode
-      case last&.type
-      when :on_sp
-        last.value == TRAILING_SLASH
-      when :on_tstring_end
-        true
-      else
-        false
-      end
+      return unless (last = @tokens.last)
+      @line.byteindex(TRAILING_SLASH, last.location.end_column) != nil
     end
 
-    # Endless method detection
-    #
-    # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
-    # Detecting a "oneliner" seems to need a state machine.
-    # This can be done by looking mostly at the "state" (last value):
-    #
-    #   ENDFN -> BEG (token = '=' ) -> END
-    #
     private def set_kw_end
-      oneliner_count = 0
-      in_oneliner_def = nil
-
       kw_count = 0
       end_count = 0
 
-      @ignore_newline_not_beg = false
       @tokens.each do |token|
         kw_count += 1 if token.is_kw?
         end_count += 1 if token.is_end?
-
-        if token.type == :on_ignored_nl
-          @ignore_newline_not_beg = !token.expr_beg?
-        end
-
-        if in_oneliner_def.nil?
-          in_oneliner_def = :ENDFN if token.state.allbits?(Ripper::EXPR_ENDFN)
-        elsif token.state.allbits?(Ripper::EXPR_ENDFN)
-          # Continue
-        elsif token.state.allbits?(Ripper::EXPR_BEG)
-          in_oneliner_def = :BODY if token.value == "="
-        elsif token.state.allbits?(Ripper::EXPR_END)
-          # We found an endless method, count it
-          oneliner_count += 1 if in_oneliner_def == :BODY
-
-          in_oneliner_def = nil
-        else
-          in_oneliner_def = nil
-        end
       end
 
-      kw_count -= oneliner_count
+      kw_count -= @endless_def_count
 
       @is_kw = (kw_count - end_count) > 0
       @is_end = (end_count - kw_count) > 0