diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb index ed02e96574..787181b5a7 100644 --- a/lib/prism/translation/ripper/lexer.rb +++ b/lib/prism/translation/ripper/lexer.rb @@ -6,7 +6,7 @@ module Prism module Translation class Ripper - class Lexer # :nodoc: + class Lexer < Ripper # :nodoc: # :stopdoc: class State @@ -39,6 +39,92 @@ def allbits?(i) to_int.allbits?(i) end def anybits?(i) to_int.anybits?(i) end def nobits?(i) to_int.nobits?(i) end end + + class Elem + attr_accessor :pos, :event, :tok, :state, :message + + def initialize(pos, event, tok, state, message = nil) + @pos = pos + @event = event + @tok = tok + @state = State.new(state) + @message = message + end + + def [](index) + case index + when 0, :pos + @pos + when 1, :event + @event + when 2, :tok + @tok + when 3, :state + @state + when 4, :message + @message + else + nil + end + end + + def inspect + "#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>" + end + + alias to_s inspect + + def pretty_print(q) + q.group(2, "#<#{self.class}:", ">") { + q.breakable + q.text("#{event}@#{pos[0]}:#{pos[1]}") + q.breakable + state.pretty_print(q) + q.breakable + q.text("token: ") + tok.pretty_print(q) + if message + q.breakable + q.text("message: ") + q.text(message) + end + } + end + + def to_a + if @message + [@pos, @event, @tok, @state, @message] + else + [@pos, @event, @tok, @state] + end + end + end + + def initialize(...) + super + @lex_compat = Prism.lex_compat(@source, filepath: filename, line: lineno) + end + + # Returns the lex_compat result wrapped in `Elem`. Errors are omitted. + # Since ripper is a streaming parser, tokens are expected to be emitted in the order + # that the parser encounters them. This is not implemented. + def parse(raise_errors: false) + if @lex_compat.failure? && raise_errors + raise SyntaxError, @lex_compat.errors.first.message + else + @lex_compat.value.map do |position, event, token, state| + Elem.new(position, event, token, state.to_int) + end + end + end + + # Similar to parse but ripper sorts the elements by position in the source. Also + # includes errors. Since prism does error recovery, in cases of syntax errors + # the result may differ greatly compared to ripper. + def scan(...) + parse(...) + end + # :startdoc: end end diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index bbd85585a9..2bd9c2fe4a 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -38,7 +38,7 @@ class RipperTest < TestCase end # Skip these tests that we haven't implemented yet. - omitted = [ + omitted_sexp_raw = [ "dos_endings.txt", "heredocs_with_fake_newlines.txt", "heredocs_with_ignored_newlines.txt", @@ -59,8 +59,29 @@ class RipperTest < TestCase "whitequark/slash_newline_in_heredocs.txt" ] - Fixture.each_for_current_ruby(except: incorrect | omitted) do |fixture| - define_method(fixture.test_name) { assert_ripper(fixture.read) } + omitted_lexer_parse = [ + "comments.txt", + "heredoc_percent_q_newline_delimiter.txt", + "heredoc_with_escaped_newline_at_start.txt", + "heredocs_with_fake_newlines.txt", + "indented_file_end.txt", + "seattlerb/TestRubyParserShared.txt", + "seattlerb/class_comments.txt", + "seattlerb/module_comments.txt", + "seattlerb/parse_line_block_inline_comment_leading_newlines.txt", + "seattlerb/parse_line_block_inline_multiline_comment.txt", + "spanning_heredoc_newlines.txt", + "strings.txt", + "whitequark/dedenting_heredoc.txt", + "whitequark/procarg0.txt", + ] + + Fixture.each_for_current_ruby(except: incorrect | omitted_sexp_raw) do |fixture| + define_method("#{fixture.test_name}_sexp_raw") { assert_ripper_sexp_raw(fixture.read) } + end + + Fixture.each_for_current_ruby(except: incorrect | omitted_lexer_parse) do |fixture| + define_method("#{fixture.test_name}_lexer_parse") { assert_ripper_lexer_parse(fixture.read) } end # Check that the hardcoded values don't change without us noticing. @@ -76,8 +97,27 @@ def test_internals private - def assert_ripper(source) + def assert_ripper_sexp_raw(source) assert_equal Ripper.sexp_raw(source), Prism::Translation::Ripper.sexp_raw(source) end + + def assert_ripper_lexer_parse(source) + prism = Translation::Ripper::Lexer.new(source).parse + ripper = Ripper::Lexer.new(source).parse + ripper.reject! { |elem| elem.event == :on_sp } # Prism doesn't emit on_sp + ripper.sort_by!(&:pos) # Prism emits tokens by their order in the code, not in parse order + + [prism.size, ripper.size].max.times do |i| + expected = ripper[i].to_a + actual = prism[i].to_a + # Since tokens related to heredocs are not emitted in the same order, + # the state also doesn't line up. + if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end + expected[3] = actual[3] = nil + end + + assert_equal(expected, actual) + end + end end end