From e324f8a1f32d1d49e32029a4c0d274386d27f095 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 27 Jan 2026 11:15:17 +0100 Subject: [PATCH 1/7] Raise minimum supported ruby version to 3.3 I want to start porting this to prism without the translation layer. --- .github/workflows/ci.yml | 1 + .standard.yml | 2 +- CHANGELOG.md | 2 ++ spec/integration/ruby_command_line_spec.rb | 4 ---- spec/unit/api_spec.rb | 4 ---- spec/unit/code_line_spec.rb | 2 -- spec/unit/core_ext_spec.rb | 2 -- syntax_suggest.gemspec | 2 +- 8 files changed, 5 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 61689a4..3e1eaed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,7 @@ jobs: uses: ruby/actions/.github/workflows/ruby_versions.yml@master with: engine: cruby + min_version: 3.3 test: needs: ruby-versions diff --git a/.standard.yml b/.standard.yml index 2547695..3a33af2 100644 --- a/.standard.yml +++ b/.standard.yml @@ -1 +1 @@ -ruby_version: 3.0.0 +ruby_version: 3.3.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index dc1a0a9..6600c5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ ## HEAD (unreleased) +- Changed: Changed: Minimum supported Ruby version is now 3.3. (https://github.com/ruby/syntax_suggest/pull/246) + ## 2.0.3 - Fix: Correctly identify trailing slashes when using Prism > 1.8.0. (https://github.com/ruby/syntax_suggest/pull/243) diff --git a/spec/integration/ruby_command_line_spec.rb b/spec/integration/ruby_command_line_spec.rb index c1ec4be..02354ce 100644 --- a/spec/integration/ruby_command_line_spec.rb +++ b/spec/integration/ruby_command_line_spec.rb @@ -94,8 +94,6 @@ module SyntaxSuggest end it "gem can be tested when executing on Ruby with default gem included" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - out = `#{ruby} -I#{lib_dir} -rsyntax_suggest -e "puts SyntaxError.instance_method(:detailed_message).source_location" 2>&1` expect($?.success?).to be_truthy @@ -103,8 +101,6 @@ module SyntaxSuggest end it "annotates a syntax error in Ruby 3.2+ when require is not used" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - Dir.mktmpdir do |dir| tmpdir = Pathname(dir) script = tmpdir.join("script.rb") diff --git a/spec/unit/api_spec.rb b/spec/unit/api_spec.rb index e900b9e..b41fd9c 100644 --- a/spec/unit/api_spec.rb +++ b/spec/unit/api_spec.rb @@ -69,8 +69,6 @@ def fake_error.message end it "respects highlight API" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - core_ext_file = lib_dir.join("syntax_suggest").join("core_ext.rb") require_relative core_ext_file @@ -91,8 +89,6 @@ def detailed_message(**kwargs) end it "can be disabled via falsey kwarg" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - core_ext_file = lib_dir.join("syntax_suggest").join("core_ext.rb") require_relative core_ext_file diff --git a/spec/unit/code_line_spec.rb b/spec/unit/code_line_spec.rb index 5b62cc2..761c460 100644 --- a/spec/unit/code_line_spec.rb +++ b/spec/unit/code_line_spec.rb @@ -17,8 +17,6 @@ def to_json(*opts) end it "supports endless method definitions" do - skip("Unsupported ruby version") unless Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3") - line = CodeLine.from_source(<<~EOM).first def square(x) = x * x EOM diff --git a/spec/unit/core_ext_spec.rb b/spec/unit/core_ext_spec.rb index 499c38a..d579cc8 100644 --- a/spec/unit/core_ext_spec.rb +++ b/spec/unit/core_ext_spec.rb @@ -3,8 +3,6 @@ module SyntaxSuggest RSpec.describe "Core extension" do it "SyntaxError monkepatch ensures there is a newline to the end of the file" do - skip if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("3.2") - Dir.mktmpdir do |dir| tmpdir = Pathname(dir) file = tmpdir.join("file.rb") diff --git a/syntax_suggest.gemspec b/syntax_suggest.gemspec index 756a85b..44e458a 100644 --- a/syntax_suggest.gemspec +++ b/syntax_suggest.gemspec @@ -16,7 +16,7 @@ Gem::Specification.new do |spec| spec.description = 'When you get an "unexpected end" in your syntax this gem helps you find it' spec.homepage = "https://github.com/ruby/syntax_suggest.git" spec.license = "MIT" - spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0") + spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0") spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = "https://github.com/ruby/syntax_suggest.git" From eae84737da059f1d613f85dbdb9ba4f58ad6b56a Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 27 Jan 2026 11:23:05 +0100 Subject: [PATCH 2/7] Prism is always present on Ruby 3.3 Allows to remove some compatibility code with ripper --- .github/workflows/ci.yml | 23 ++------------- lib/syntax_suggest/api.rb | 43 ++++------------------------ lib/syntax_suggest/explain_syntax.rb | 10 +------ lib/syntax_suggest/lex_all.rb | 10 ++----- lib/syntax_suggest/ripper_errors.rb | 39 ------------------------- spec/unit/api_spec.rb | 6 ---- 6 files changed, 12 insertions(+), 119 deletions(-) delete mode 100644 lib/syntax_suggest/ripper_errors.rb diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3e1eaed..08ccbc6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,7 +34,9 @@ jobs: matrix: ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }} prism_version: - - 1.2.0 # Shipped with Ruby 3.4 as default parser https://www.ruby-lang.org/en/news/2024/12/25/ruby-3-4-0-released/ + # See https://stdgems.org/prism for which ruby version shipped with which prism version + - 0.19.0 + - 1.2.0 - 1.8.0 - head env: @@ -53,22 +55,3 @@ jobs: - name: test run: bin/rake test continue-on-error: ${{ matrix.ruby == 'head' }} - - test-disable-prism: - needs: ruby-versions - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }} - steps: - - name: Checkout code - uses: actions/checkout@v6 - - name: Set up Ruby - uses: ruby/setup-ruby@v1 - with: - ruby-version: ${{ matrix.ruby }} - bundler-cache: true - - name: test - run: SYNTAX_SUGGEST_DISABLE_PRISM=1 bin/rake test - continue-on-error: ${{ matrix.ruby == 'head' }} diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb index 0f82d83..a86237f 100644 --- a/lib/syntax_suggest/api.rb +++ b/lib/syntax_suggest/api.rb @@ -7,26 +7,12 @@ require "pathname" require "timeout" +# Prism is the new parser, replacing Ripper +require "prism" # We need Ripper loaded for `Prism.lex_compat` even if we're using Prism # for lexing and parsing require "ripper" -# Prism is the new parser, replacing Ripper -# -# We need to "dual boot" both for now because syntax_suggest -# supports older rubies that do not ship with syntax suggest. -# -# We also need the ability to control loading of this library -# so we can test that both modes work correctly in CI. -if (value = ENV["SYNTAX_SUGGEST_DISABLE_PRISM"]) - warn "Skipping loading prism due to SYNTAX_SUGGEST_DISABLE_PRISM=#{value}" -else - begin - require "prism" - rescue LoadError - end -end - module SyntaxSuggest # Used to indicate a default value that cannot # be confused with another input. @@ -35,14 +21,6 @@ module SyntaxSuggest class Error < StandardError; end TIMEOUT_DEFAULT = ENV.fetch("SYNTAX_SUGGEST_TIMEOUT", 1).to_i - # SyntaxSuggest.use_prism_parser? [Private] - # - # Tells us if the prism parser is available for use - # or if we should fallback to `Ripper` - def self.use_prism_parser? - defined?(Prism) - end - # SyntaxSuggest.handle_error [Public] # # Takes a `SyntaxError` exception, uses the @@ -152,20 +130,11 @@ def self.valid_without?(without_lines:, code_lines:) # SyntaxSuggest.invalid? [Private] # # Opposite of `SyntaxSuggest.valid?` - if defined?(Prism) - def self.invalid?(source) - source = source.join if source.is_a?(Array) - source = source.to_s + def self.invalid?(source) + source = source.join if source.is_a?(Array) + source = source.to_s - Prism.parse(source).failure? - end - else - def self.invalid?(source) - source = source.join if source.is_a?(Array) - source = source.to_s - - Ripper.new(source).tap(&:parse).error? - end + Prism.parse(source).failure? end # SyntaxSuggest.valid? [Private] diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb index 0d80c4d..5d5daa8 100644 --- a/lib/syntax_suggest/explain_syntax.rb +++ b/lib/syntax_suggest/explain_syntax.rb @@ -2,18 +2,10 @@ require_relative "left_right_lex_count" -if !SyntaxSuggest.use_prism_parser? - require_relative "ripper_errors" -end - module SyntaxSuggest class GetParseErrors def self.errors(source) - if SyntaxSuggest.use_prism_parser? - Prism.parse(source).errors.map(&:message) - else - RipperErrors.new(source).call.errors - end + Prism.parse(source).errors.map(&:message) end end diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb index c16fbb5..68b7089 100644 --- a/lib/syntax_suggest/lex_all.rb +++ b/lib/syntax_suggest/lex_all.rb @@ -40,14 +40,8 @@ def initialize(source:, source_lines: nil) } end - if SyntaxSuggest.use_prism_parser? - def self.lex(source, line_number) - Prism.lex_compat(source, line: line_number).value.sort_by { |values| values[0] } - end - else - def self.lex(source, line_number) - Ripper::Lexer.new(source, "-", line_number).parse.sort_by(&:pos) - end + def self.lex(source, line_number) + Prism.lex_compat(source, line: line_number).value.sort_by { |values| values[0] } end def to_a diff --git a/lib/syntax_suggest/ripper_errors.rb b/lib/syntax_suggest/ripper_errors.rb deleted file mode 100644 index 4e2bc90..0000000 --- a/lib/syntax_suggest/ripper_errors.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Capture parse errors from Ripper - # - # Prism returns the errors with their messages, but Ripper - # does not. To get them we must make a custom subclass. - # - # Example: - # - # puts RipperErrors.new(" def foo").call.errors - # # => ["syntax error, unexpected end-of-input, expecting ';' or '\\n'"] - class RipperErrors < Ripper - attr_reader :errors - - # Comes from ripper, called - # on every parse error, msg - # is a string - def on_parse_error(msg) - @errors ||= [] - @errors << msg - end - - alias_method :on_alias_error, :on_parse_error - alias_method :on_assign_error, :on_parse_error - alias_method :on_class_name_error, :on_parse_error - alias_method :on_param_error, :on_parse_error - alias_method :compile_error, :on_parse_error - - def call - @run_once ||= begin - @errors = [] - parse - true - end - self - end - end -end diff --git a/spec/unit/api_spec.rb b/spec/unit/api_spec.rb index b41fd9c..9299a17 100644 --- a/spec/unit/api_spec.rb +++ b/spec/unit/api_spec.rb @@ -8,12 +8,6 @@ module SyntaxSuggest RSpec.describe "Top level SyntaxSuggest api" do - it "doesn't load prism if env var is set" do - skip("SYNTAX_SUGGEST_DISABLE_PRISM not set") unless ENV["SYNTAX_SUGGEST_DISABLE_PRISM"] - - expect(SyntaxSuggest.use_prism_parser?).to be_falsey - end - it "has a `handle_error` interface" do fake_error = Object.new def fake_error.message From 9bc02cd8389f6815e9d3e0256c1c1481902718e0 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Fri, 20 Feb 2026 17:17:37 +0100 Subject: [PATCH 3/7] Remove workaround for ripper not lexing the entire source Maybe ripper fixed it since then. But prism also doesn't have this problem. --- lib/syntax_suggest/clean_document.rb | 2 +- lib/syntax_suggest/code_line.rb | 7 +++---- lib/syntax_suggest/lex_all.rb | 24 ++---------------------- 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb index ba307af..4fc8e1b 100644 --- a/lib/syntax_suggest/clean_document.rb +++ b/lib/syntax_suggest/clean_document.rb @@ -86,7 +86,7 @@ module SyntaxSuggest class CleanDocument def initialize(source:) lines = clean_sweep(source: source) - @document = CodeLine.from_source(lines.join, lines: lines) + @document = CodeLine.from_source(lines.join) end # Call all of the document "cleaners" diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb index 76ca892..485b5a1 100644 --- a/lib/syntax_suggest/code_line.rb +++ b/lib/syntax_suggest/code_line.rb @@ -26,10 +26,9 @@ class CodeLine # Returns an array of CodeLine objects # from the source string - def self.from_source(source, lines: nil) - lines ||= source.lines - lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } - lines.map.with_index do |line, index| + def self.from_source(source) + lex_array_for_line = LexAll.new(source: source).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } + source.lines.map.with_index do |line, index| CodeLine.new( line: line, index: index, diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb index 68b7089..6bbea89 100644 --- a/lib/syntax_suggest/lex_all.rb +++ b/lib/syntax_suggest/lex_all.rb @@ -1,13 +1,7 @@ # frozen_string_literal: true module SyntaxSuggest - # Ripper.lex is not guaranteed to lex the entire source document - # - # This class guarantees the whole document is lex-ed by iteratively - # lexing the document where ripper stopped. - # - # Prism likely doesn't have the same problem. Once ripper support is removed - # we can likely reduce the complexity here if not remove the whole concept. + # Lexes the whole source and wraps the tokens in `LexValue`. # # Example usage: # @@ -18,22 +12,8 @@ module SyntaxSuggest class LexAll include Enumerable - def initialize(source:, source_lines: nil) + def initialize(source:) @lex = self.class.lex(source, 1) - lineno = @lex.last[0][0] + 1 - source_lines ||= source.lines - last_lineno = source_lines.length - - until lineno >= last_lineno - lines = source_lines[lineno..] - - @lex.concat( - self.class.lex(lines.join, lineno + 1) - ) - - lineno = @lex.last[0].first + 1 - end - last_lex = nil @lex.map! { |elem| last_lex = LexValue.new(elem[0].first, elem[1], elem[2], elem[3], last_lex) From 766b7f5adfdd92b2dfa11181b82c8f4e34ebb747 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Sat, 21 Feb 2026 11:42:01 +0100 Subject: [PATCH 4/7] Remove now unnecessary `core_ext` fallback --- lib/syntax_suggest/core_ext.rb | 127 ++++++++++----------------------- 1 file changed, 39 insertions(+), 88 deletions(-) diff --git a/lib/syntax_suggest/core_ext.rb b/lib/syntax_suggest/core_ext.rb index 94f57ba..ffbc922 100644 --- a/lib/syntax_suggest/core_ext.rb +++ b/lib/syntax_suggest/core_ext.rb @@ -1,96 +1,47 @@ # frozen_string_literal: true -# Ruby 3.2+ has a cleaner way to hook into Ruby that doesn't use `require` -if SyntaxError.method_defined?(:detailed_message) - module SyntaxSuggest - # SyntaxSuggest.module_for_detailed_message [Private] - # - # Used to monkeypatch SyntaxError via Module.prepend - def self.module_for_detailed_message - Module.new { - def detailed_message(highlight: true, syntax_suggest: true, **kwargs) - return super unless syntax_suggest - - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - message = super - - if path - file = Pathname.new(path) - io = SyntaxSuggest::MiniStringIO.new - - SyntaxSuggest.call( - io: io, - source: file.read, - filename: file, - terminal: highlight - ) - annotation = io.string - - annotation += "\n" unless annotation.end_with?("\n") - - annotation + message - else - message - end - rescue => e - if ENV["SYNTAX_SUGGEST_DEBUG"] - $stderr.warn(e.message) - $stderr.warn(e.backtrace) - end - - # Ignore internal errors +module SyntaxSuggest + # SyntaxSuggest.module_for_detailed_message [Private] + # + # Used to monkeypatch SyntaxError via Module.prepend + def self.module_for_detailed_message + Module.new { + def detailed_message(highlight: true, syntax_suggest: true, **kwargs) + return super unless syntax_suggest + + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + message = super + + if path + file = Pathname.new(path) + io = SyntaxSuggest::MiniStringIO.new + + SyntaxSuggest.call( + io: io, + source: file.read, + filename: file, + terminal: highlight + ) + annotation = io.string + + annotation += "\n" unless annotation.end_with?("\n") + + annotation + message + else message end - } - end - end - - SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message) -else - autoload :Pathname, "pathname" - - #-- - # Monkey patch kernel to ensure that all `require` calls call the same - # method - #++ - module Kernel - # :stopdoc: - - module_function - - alias_method :syntax_suggest_original_require, :require - alias_method :syntax_suggest_original_require_relative, :require_relative - alias_method :syntax_suggest_original_load, :load - - def load(file, wrap = false) - syntax_suggest_original_load(file) - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end - - def require(file) - syntax_suggest_original_require(file) - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end + rescue => e + if ENV["SYNTAX_SUGGEST_DEBUG"] + $stderr.warn(e.message) + $stderr.warn(e.backtrace) + end - def require_relative(file) - if Pathname.new(file).absolute? - syntax_suggest_original_require file - else - relative_from = caller_locations(1..1).first - relative_from_path = relative_from.absolute_path || relative_from.path - syntax_suggest_original_require File.expand_path("../#{file}", relative_from_path) + # Ignore internal errors + message end - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end + } end end + +SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message) From 8e6671a20f0f3b765a6c1764d22ca0ddb9dcae24 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Sat, 21 Feb 2026 10:29:37 +0100 Subject: [PATCH 5/7] Rename lex to token where appropriate I had a bit of trouble following what is going on. Lexing produces tokens, this renames to reflect that. --- lib/syntax_suggest/clean_document.rb | 8 ++--- lib/syntax_suggest/code_line.rb | 35 ++++++++++--------- lib/syntax_suggest/explain_syntax.rb | 8 ++--- ...lex_count.rb => left_right_token_count.rb} | 28 +++++++-------- lib/syntax_suggest/lex_all.rb | 28 +++++++-------- lib/syntax_suggest/{lex_value.rb => token.rb} | 22 ++++++------ spec/unit/code_block_spec.rb | 2 +- spec/unit/lex_all_spec.rb | 8 ++--- 8 files changed, 70 insertions(+), 69 deletions(-) rename lib/syntax_suggest/{left_right_lex_count.rb => left_right_token_count.rb} (85%) rename lib/syntax_suggest/{lex_value.rb => token.rb} (71%) diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb index 4fc8e1b..b9576a5 100644 --- a/lib/syntax_suggest/clean_document.rb +++ b/lib/syntax_suggest/clean_document.rb @@ -182,8 +182,8 @@ def join_heredoc! start_index_stack = [] heredoc_beg_end_index = [] lines.each do |line| - line.lex.each do |lex_value| - case lex_value.type + line.tokens.each do |token| + case token.type when :on_heredoc_beg start_index_stack << line.index when :on_heredoc_end @@ -273,7 +273,7 @@ def join_groups(groups) # Join group into the first line @document[line.index] = CodeLine.new( - lex: lines.map(&:lex).flatten, + tokens: lines.map(&:tokens).flatten, line: lines.join, index: line.index ) @@ -282,7 +282,7 @@ def join_groups(groups) lines[1..].each do |line| # The above lines already have newlines in them, if add more # then there will be double newline, use an empty line instead - @document[line.index] = CodeLine.new(line: "", index: line.index, lex: []) + @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: []) end end self diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb index 485b5a1..c1b3728 100644 --- a/lib/syntax_suggest/code_line.rb +++ b/lib/syntax_suggest/code_line.rb @@ -27,19 +27,20 @@ class CodeLine # Returns an array of CodeLine objects # from the source string def self.from_source(source) - lex_array_for_line = LexAll.new(source: source).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } + tokens = LexAll.new(source: source) + tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token } source.lines.map.with_index do |line, index| CodeLine.new( line: line, index: index, - lex: lex_array_for_line[index + 1] + tokens: tokens_for_line[index + 1] ) end end - attr_reader :line, :index, :lex, :line_number, :indent - def initialize(line:, index:, lex:) - @lex = lex + attr_reader :line, :index, :tokens, :line_number, :indent + def initialize(line:, index:, tokens:) + @tokens = tokens @line = line @index = index @original = line @@ -180,12 +181,12 @@ def ignore_newline_not_beg? # expect(lines.first.trailing_slash?).to eq(true) # def trailing_slash? - last = @lex.last + last = @tokens.last # Older versions of prism diverged slightly from Ripper in compatibility mode case last&.type when :on_sp - last.token == TRAILING_SLASH + last.value == TRAILING_SLASH when :on_tstring_end true else @@ -209,21 +210,21 @@ def trailing_slash? end_count = 0 @ignore_newline_not_beg = false - @lex.each do |lex| - kw_count += 1 if lex.is_kw? - end_count += 1 if lex.is_end? + @tokens.each do |token| + kw_count += 1 if token.is_kw? + end_count += 1 if token.is_end? - if lex.type == :on_ignored_nl - @ignore_newline_not_beg = !lex.expr_beg? + if token.type == :on_ignored_nl + @ignore_newline_not_beg = !token.expr_beg? end if in_oneliner_def.nil? - in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN) - elsif lex.state.allbits?(Ripper::EXPR_ENDFN) + in_oneliner_def = :ENDFN if token.state.allbits?(Ripper::EXPR_ENDFN) + elsif token.state.allbits?(Ripper::EXPR_ENDFN) # Continue - elsif lex.state.allbits?(Ripper::EXPR_BEG) - in_oneliner_def = :BODY if lex.token == "=" - elsif lex.state.allbits?(Ripper::EXPR_END) + elsif token.state.allbits?(Ripper::EXPR_BEG) + in_oneliner_def = :BODY if token.value == "=" + elsif token.state.allbits?(Ripper::EXPR_END) # We found an endless method, count it oneliner_count += 1 if in_oneliner_def == :BODY diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb index 5d5daa8..d7f5262 100644 --- a/lib/syntax_suggest/explain_syntax.rb +++ b/lib/syntax_suggest/explain_syntax.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "left_right_lex_count" +require_relative "left_right_token_count" module SyntaxSuggest class GetParseErrors @@ -45,14 +45,14 @@ class ExplainSyntax def initialize(code_lines:) @code_lines = code_lines - @left_right = LeftRightLexCount.new + @left_right = LeftRightTokenCount.new @missing = nil end def call @code_lines.each do |line| - line.lex.each do |lex| - @left_right.count_lex(lex) + line.tokens.each do |token| + @left_right.count_token(token) end end diff --git a/lib/syntax_suggest/left_right_lex_count.rb b/lib/syntax_suggest/left_right_token_count.rb similarity index 85% rename from lib/syntax_suggest/left_right_lex_count.rb rename to lib/syntax_suggest/left_right_token_count.rb index 6fcae74..87a1c39 100644 --- a/lib/syntax_suggest/left_right_lex_count.rb +++ b/lib/syntax_suggest/left_right_token_count.rb @@ -9,19 +9,19 @@ module SyntaxSuggest # # Example: # - # left_right = LeftRightLexCount.new + # left_right = LeftRightTokenCount.new # left_right.count_kw # left_right.missing.first # # => "end" # - # left_right = LeftRightLexCount.new + # left_right = LeftRightTokenCount.new # source = "{ a: b, c: d" # Note missing '}' - # LexAll.new(source: source).each do |lex| - # left_right.count_lex(lex) + # LexAll.new(source: source).each do |token| + # left_right.count_token(token) # end # left_right.missing.first # # => "}" - class LeftRightLexCount + class LeftRightTokenCount def initialize @kw_count = 0 @end_count = 0 @@ -49,14 +49,14 @@ def count_end # # Example: # - # left_right = LeftRightLexCount.new - # left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG)) + # left_right = LeftRightTokenCount.new + # left_right.count_token(Token.new(1, :on_lbrace, "{", Ripper::EXPR_BEG)) # left_right.count_for_char("{") # # => 1 # left_right.count_for_char("}") # # => 0 - def count_lex(lex) - case lex.type + def count_token(token) + case token.type when :on_tstring_content # ^^^ # Means it's a string or a symbol `"{"` rather than being @@ -70,7 +70,7 @@ def count_lex(lex) # The start token will be the full thing `%Q{` but we # need to count it as if it's a `{`. Any token # can be used - char = lex.token[-1] + char = token.value[-1] @count_for_char[char] += 1 if @count_for_char.key?(char) when :on_embexpr_beg # ^^^ @@ -87,14 +87,14 @@ def count_lex(lex) # When we see `#{` count it as a `{` or we will # have a mis-match count. # - case lex.token + case token.value when "\#{" @count_for_char["{"] += 1 end else - @end_count += 1 if lex.is_end? - @kw_count += 1 if lex.is_kw? - @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token) + @end_count += 1 if token.is_end? + @kw_count += 1 if token.is_kw? + @count_for_char[token.value] += 1 if @count_for_char.key?(token.value) end end diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb index 6bbea89..7c7a163 100644 --- a/lib/syntax_suggest/lex_all.rb +++ b/lib/syntax_suggest/lex_all.rb @@ -5,18 +5,18 @@ module SyntaxSuggest # # Example usage: # - # lex = LexAll.new(source: source) - # lex.each do |value| - # puts value.line + # tokens = LexAll.new(source: source) + # tokens.each do |token| + # puts token.line # end class LexAll include Enumerable def initialize(source:) - @lex = self.class.lex(source, 1) - last_lex = nil - @lex.map! { |elem| - last_lex = LexValue.new(elem[0].first, elem[1], elem[2], elem[3], last_lex) + @tokens = self.class.lex(source, 1) + last_token = nil + @tokens.map! { |elem| + last_token = Token.new(elem[0].first, elem[1], elem[2], elem[3], last_token) } end @@ -25,24 +25,24 @@ def self.lex(source, line_number) end def to_a - @lex + @tokens end def each - return @lex.each unless block_given? - @lex.each do |x| - yield x + return @tokens.each unless block_given? + @tokens.each do |token| + yield token end end def [](index) - @lex[index] + @tokens[index] end def last - @lex.last + @tokens.last end end end -require_relative "lex_value" +require_relative "token" diff --git a/lib/syntax_suggest/lex_value.rb b/lib/syntax_suggest/token.rb similarity index 71% rename from lib/syntax_suggest/lex_value.rb rename to lib/syntax_suggest/token.rb index b46a332..d4577f5 100644 --- a/lib/syntax_suggest/lex_value.rb +++ b/lib/syntax_suggest/token.rb @@ -9,29 +9,29 @@ module SyntaxSuggest # # Would translate into: # - # lex.line # => 1 - # lex.type # => :on_indent - # lex.token # => "describe" - class LexValue - attr_reader :line, :type, :token, :state + # token.line # => 1 + # token.type # => :on_indent + # token.value # => "describe" + class Token + attr_reader :line, :type, :value, :state - def initialize(line, type, token, state, last_lex = nil) + def initialize(line, type, value, state, last_token = nil) @line = line @type = type - @token = token + @value = value @state = state - set_kw_end(last_lex) + set_kw_end(last_token) end - private def set_kw_end(last_lex) + private def set_kw_end(last_token) @is_end = false @is_kw = false return if type != :on_kw - return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953 + return if last_token && last_token.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953 - case token + case value when "if", "unless", "while", "until" # Only count if/unless when it's not a "trailing" if/unless # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375 diff --git a/spec/unit/code_block_spec.rb b/spec/unit/code_block_spec.rb index 3ab2751..baf1c63 100644 --- a/spec/unit/code_block_spec.rb +++ b/spec/unit/code_block_spec.rb @@ -33,7 +33,7 @@ def foo array = [block_2, block_1, block_0].sort expect(array.last).to eq(block_2) - block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4, lex: [])) + block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4, tokens: [])) array.prepend(block) expect(array.max).to eq(block) end diff --git a/spec/unit/lex_all_spec.rb b/spec/unit/lex_all_spec.rb index 9621c9e..b88ae65 100644 --- a/spec/unit/lex_all_spec.rb +++ b/spec/unit/lex_all_spec.rb @@ -17,10 +17,10 @@ module SyntaxSuggest end # 9 EOM - lex = LexAll.new(source: source) - expect(lex.map(&:token).to_s).to include("dog") - expect(lex.first.line).to eq(1) - expect(lex.last.line).to eq(9) + tokens = LexAll.new(source: source) + expect(tokens.map(&:value)).to include("dog") + expect(tokens.first.line).to eq(1) + expect(tokens.last.line).to eq(9) end end end From 051f52016ac46cc310200c3c25806c01aa3c6062 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Sat, 21 Feb 2026 15:52:34 +0100 Subject: [PATCH 6/7] Fix typo for %I delimiter check Also add tests for the other types which currently don't have one --- CHANGELOG.md | 1 + lib/syntax_suggest/left_right_token_count.rb | 2 +- spec/unit/explain_syntax_spec.rb | 32 ++++++++++++++++++-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6600c5b..1d2b1de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ## 2.0.3 - Fix: Correctly identify trailing slashes when using Prism > 1.8.0. (https://github.com/ruby/syntax_suggest/pull/243) +- Fix: Correctly handle `%I` delimiters. (https://github.com/ruby/syntax_suggest/pull/249) - Internal: Add tests to multiple versions of prism ## 2.0.2 diff --git a/lib/syntax_suggest/left_right_token_count.rb b/lib/syntax_suggest/left_right_token_count.rb index 87a1c39..c4a3415 100644 --- a/lib/syntax_suggest/left_right_token_count.rb +++ b/lib/syntax_suggest/left_right_token_count.rb @@ -62,7 +62,7 @@ def count_token(token) # Means it's a string or a symbol `"{"` rather than being # part of a data structure (like a hash) `{ a: b }` # ignore it. - when :on_words_beg, :on_symbos_beg, :on_qwords_beg, + when :on_words_beg, :on_symbols_beg, :on_qwords_beg, :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg # ^^^ # Handle shorthand syntaxes like `%Q{ i am a string }` diff --git a/spec/unit/explain_syntax_spec.rb b/spec/unit/explain_syntax_spec.rb index c62a42b..7ddb32b 100644 --- a/spec/unit/explain_syntax_spec.rb +++ b/spec/unit/explain_syntax_spec.rb @@ -17,9 +17,23 @@ module SyntaxSuggest expect(explain.errors.join.strip).to_not be_empty end - it "handles %w[]" do + %w[w W i I].each do |type| + it "handles %#{type}-style array" do + source = <<~EOM + node.is_a?(Op) && %#{type}[| ||].include?(node.value) && + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + end + end + + it "handles %r-style regexp" do source = <<~EOM - node.is_a?(Op) && %w[| ||].include?(node.value) && + node.is_a?(Op) && %r{| ||}.include?(node.value) && EOM explain = ExplainSyntax.new( @@ -29,6 +43,20 @@ module SyntaxSuggest expect(explain.missing).to eq([]) end + ["", "q", "Q"].each do |type| + it "handles %#{type}-style string" do + source = <<~EOM + node.is_a?(Op) && %#{type}(| ||).include?(node.value) && + EOM + + explain = ExplainSyntax.new( + code_lines: CodeLine.from_source(source) + ).call + + expect(explain.missing).to eq([]) + end + end + it "doesn't falsely identify strings or symbols as critical chars" do source = <<~EOM a = ['(', '{', '[', '|'] From 9f81d6831f623c7ca79e43813cd49b54474ff07d Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Wed, 25 Feb 2026 14:26:11 +0100 Subject: [PATCH 7/7] Fully migrate to prism It mostly continues to rely on tokens. But for a few things like endless method defs and multiline method continuations it uses AST. These are either very difficult or not possible to find just by checking tokens. Because of multiline method calls, comments now don't need to be trimmed anymore. --- lib/syntax_suggest/api.rb | 6 +- lib/syntax_suggest/clean_document.rb | 98 ++------------------ lib/syntax_suggest/code_line.rb | 92 +++++------------- lib/syntax_suggest/left_right_token_count.rb | 24 ++--- lib/syntax_suggest/lex_all.rb | 48 ---------- lib/syntax_suggest/token.rb | 64 +++++-------- lib/syntax_suggest/visitor.rb | 52 +++++++++++ spec/unit/clean_document_spec.rb | 13 --- spec/unit/code_block_spec.rb | 2 +- spec/unit/code_line_spec.rb | 2 +- spec/unit/lex_all_spec.rb | 26 ------ 11 files changed, 118 insertions(+), 309 deletions(-) delete mode 100644 lib/syntax_suggest/lex_all.rb create mode 100644 lib/syntax_suggest/visitor.rb delete mode 100644 spec/unit/lex_all_spec.rb diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb index a86237f..5054efa 100644 --- a/lib/syntax_suggest/api.rb +++ b/lib/syntax_suggest/api.rb @@ -9,9 +9,6 @@ # Prism is the new parser, replacing Ripper require "prism" -# We need Ripper loaded for `Prism.lex_compat` even if we're using Prism -# for lexing and parsing -require "ripper" module SyntaxSuggest # Used to indicate a default value that cannot @@ -188,7 +185,6 @@ def self.valid?(source) require_relative "clean_document" # Helpers -require_relative "lex_all" require_relative "code_line" require_relative "code_block" require_relative "block_expand" @@ -200,3 +196,5 @@ def self.valid?(source) require_relative "pathname_from_message" require_relative "display_invalid_blocks" require_relative "parse_blocks_from_indent_line" +require_relative "visitor" +require_relative "token" diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb index b9576a5..ca230d8 100644 --- a/lib/syntax_suggest/clean_document.rb +++ b/lib/syntax_suggest/clean_document.rb @@ -67,26 +67,9 @@ module SyntaxSuggest # All of these problems are fixed by joining the whole heredoc into a single # line. # - # ## Comments and whitespace - # - # Comments can throw off the way the lexer tells us that the line - # logically belongs with the next line. This is valid ruby but - # results in a different lex output than before: - # - # 1 User. - # 2 where(name: "schneems"). - # 3 # Comment here - # 4 first - # - # To handle this we can replace comment lines with empty lines - # and then re-lex the source. This removal and re-lexing preserves - # line index and document size, but generates an easier to work with - # document. - # class CleanDocument def initialize(source:) - lines = clean_sweep(source: source) - @document = CodeLine.from_source(lines.join) + @document = CodeLine.from_source(source) end # Call all of the document "cleaners" @@ -110,62 +93,6 @@ def to_s @document.join end - # Remove comments - # - # replace with empty newlines - # - # source = <<~'EOM' - # # Comment 1 - # puts "hello" - # # Comment 2 - # puts "world" - # EOM - # - # lines = CleanDocument.new(source: source).lines - # expect(lines[0].to_s).to eq("\n") - # expect(lines[1].to_s).to eq("puts "hello") - # expect(lines[2].to_s).to eq("\n") - # expect(lines[3].to_s).to eq("puts "world") - # - # Important: This must be done before lexing. - # - # After this change is made, we lex the document because - # removing comments can change how the doc is parsed. - # - # For example: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # # comment - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(1) - # - # After the comment is removed: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(2) - # - def clean_sweep(source:) - # Match comments, but not HEREDOC strings with #{variable} interpolation - # https://rubular.com/r/HPwtW9OYxKUHXQ - source.lines.map do |line| - if line.match?(/^\s*#([^{].*|)$/) - $/ - else - line - end - end - end - # Smushes all heredoc lines into one line # # source = <<~'EOM' @@ -184,9 +111,9 @@ def join_heredoc! lines.each do |line| line.tokens.each do |token| case token.type - when :on_heredoc_beg + when :HEREDOC_START start_index_stack << line.index - when :on_heredoc_end + when :HEREDOC_END start_index = start_index_stack.pop end_index = line.index heredoc_beg_end_index << [start_index, end_index] @@ -212,20 +139,10 @@ def join_heredoc! # expect(lines[0].to_s).to eq(source) # expect(lines[1].to_s).to eq("") # - # The one known case this doesn't handle is: - # - # Ripper.lex <<~EOM - # a && - # b || - # c - # EOM - # - # For some reason this introduces `on_ignore_newline` but with BEG type - # def join_consecutive! - consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line| + consecutive_groups = @document.select(&:consecutive?).map do |code_line| take_while_including(code_line.index..) do |line| - line.ignore_newline_not_beg? + line.consecutive? end end @@ -275,14 +192,15 @@ def join_groups(groups) @document[line.index] = CodeLine.new( tokens: lines.map(&:tokens).flatten, line: lines.join, - index: line.index + index: line.index, + consecutive: false ) # Hide the rest of the lines lines[1..].each do |line| # The above lines already have newlines in them, if add more # then there will be double newline, use an empty line instead - @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: []) + @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false) end end self diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb index c1b3728..d872fe9 100644 --- a/lib/syntax_suggest/code_line.rb +++ b/lib/syntax_suggest/code_line.rb @@ -27,22 +27,33 @@ class CodeLine # Returns an array of CodeLine objects # from the source string def self.from_source(source) - tokens = LexAll.new(source: source) + ast, tokens = Prism.parse_lex(source).value + visitor = Visitor.new + visitor.visit(ast) + tokens.sort_by! { |token, _state| token.location.start_line } + + prev_token = nil + tokens.map! do |token, _state| + prev_token = Token.new(token, prev_token, visitor) + end + tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token } source.lines.map.with_index do |line, index| CodeLine.new( line: line, index: index, - tokens: tokens_for_line[index + 1] + tokens: tokens_for_line[index + 1], + consecutive: visitor.consecutive_lines.include?(index + 1) ) end end attr_reader :line, :index, :tokens, :line_number, :indent - def initialize(line:, index:, tokens:) + def initialize(line:, index:, tokens:, consecutive:) @tokens = tokens @line = line @index = index + @consecutive = consecutive @original = line @line_number = @index + 1 strip_line = line.dup @@ -151,29 +162,16 @@ def <=>(other) index <=> other.index end - # [Not stable API] - # - # Lines that have a `on_ignored_nl` type token and NOT - # a `BEG` type seem to be a good proxy for the ability - # to join multiple lines into one. - # - # This predicate method is used to determine when those - # two criteria have been met. - # - # The one known case this doesn't handle is: - # - # Ripper.lex <<~EOM - # a && - # b || - # c - # EOM - # - # For some reason this introduces `on_ignore_newline` but with BEG type - def ignore_newline_not_beg? - @ignore_newline_not_beg + # Can this line be logically joined together + # with the following line? Determined by walking + # the AST + def consecutive? + @consecutive end - # Determines if the given line has a trailing slash + # Determines if the given line has a trailing slash. + # Simply check if the line contains a backslash after + # the content of the last token. # # lines = CodeLine.from_source(<<~EOM) # it "foo" \ @@ -181,61 +179,19 @@ def ignore_newline_not_beg? # expect(lines.first.trailing_slash?).to eq(true) # def trailing_slash? - last = @tokens.last - - # Older versions of prism diverged slightly from Ripper in compatibility mode - case last&.type - when :on_sp - last.value == TRAILING_SLASH - when :on_tstring_end - true - else - false - end + return unless (last = @tokens.last) + @line.byteindex(TRAILING_SLASH, last.location.end_column) != nil end - # Endless method detection - # - # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab - # Detecting a "oneliner" seems to need a state machine. - # This can be done by looking mostly at the "state" (last value): - # - # ENDFN -> BEG (token = '=' ) -> END - # private def set_kw_end - oneliner_count = 0 - in_oneliner_def = nil - kw_count = 0 end_count = 0 - @ignore_newline_not_beg = false @tokens.each do |token| kw_count += 1 if token.is_kw? end_count += 1 if token.is_end? - - if token.type == :on_ignored_nl - @ignore_newline_not_beg = !token.expr_beg? - end - - if in_oneliner_def.nil? - in_oneliner_def = :ENDFN if token.state.allbits?(Ripper::EXPR_ENDFN) - elsif token.state.allbits?(Ripper::EXPR_ENDFN) - # Continue - elsif token.state.allbits?(Ripper::EXPR_BEG) - in_oneliner_def = :BODY if token.value == "=" - elsif token.state.allbits?(Ripper::EXPR_END) - # We found an endless method, count it - oneliner_count += 1 if in_oneliner_def == :BODY - - in_oneliner_def = nil - else - in_oneliner_def = nil - end end - kw_count -= oneliner_count - @is_kw = (kw_count - end_count) > 0 @is_end = (end_count - kw_count) > 0 end diff --git a/lib/syntax_suggest/left_right_token_count.rb b/lib/syntax_suggest/left_right_token_count.rb index c4a3415..e0562ba 100644 --- a/lib/syntax_suggest/left_right_token_count.rb +++ b/lib/syntax_suggest/left_right_token_count.rb @@ -49,21 +49,22 @@ def count_end # # Example: # + # token = CodeLine.from_source("{").first.tokens.first # left_right = LeftRightTokenCount.new - # left_right.count_token(Token.new(1, :on_lbrace, "{", Ripper::EXPR_BEG)) + # left_right.count_token(Token.new(token) # left_right.count_for_char("{") # # => 1 # left_right.count_for_char("}") # # => 0 def count_token(token) case token.type - when :on_tstring_content + when :STRING_CONTENT # ^^^ # Means it's a string or a symbol `"{"` rather than being # part of a data structure (like a hash) `{ a: b }` # ignore it. - when :on_words_beg, :on_symbols_beg, :on_qwords_beg, - :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg + when :PERCENT_UPPER_W, :PERCENT_UPPER_I, :PERCENT_LOWER_W, + :PERCENT_LOWER_I, :REGEXP_BEGIN, :STRING_BEGIN # ^^^ # Handle shorthand syntaxes like `%Q{ i am a string }` # @@ -72,25 +73,18 @@ def count_token(token) # can be used char = token.value[-1] @count_for_char[char] += 1 if @count_for_char.key?(char) - when :on_embexpr_beg + when :EMBEXPR_BEGIN # ^^^ # Embedded string expressions like `"#{foo} <-embed"` # are parsed with chars: # - # `#{` as :on_embexpr_beg - # `}` as :on_embexpr_end - # - # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end - # because sometimes the lexer thinks something is an embed - # string end, when it is not like `lol = }` (no clue why). + # `#{` as :EMBEXPR_BEGIN + # `}` as :EMBEXPR_END # # When we see `#{` count it as a `{` or we will # have a mis-match count. # - case token.value - when "\#{" - @count_for_char["{"] += 1 - end + @count_for_char["{"] += 1 else @end_count += 1 if token.is_end? @kw_count += 1 if token.is_kw? diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb deleted file mode 100644 index 7c7a163..0000000 --- a/lib/syntax_suggest/lex_all.rb +++ /dev/null @@ -1,48 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Lexes the whole source and wraps the tokens in `LexValue`. - # - # Example usage: - # - # tokens = LexAll.new(source: source) - # tokens.each do |token| - # puts token.line - # end - class LexAll - include Enumerable - - def initialize(source:) - @tokens = self.class.lex(source, 1) - last_token = nil - @tokens.map! { |elem| - last_token = Token.new(elem[0].first, elem[1], elem[2], elem[3], last_token) - } - end - - def self.lex(source, line_number) - Prism.lex_compat(source, line: line_number).value.sort_by { |values| values[0] } - end - - def to_a - @tokens - end - - def each - return @tokens.each unless block_given? - @tokens.each do |token| - yield token - end - end - - def [](index) - @tokens[index] - end - - def last - @tokens.last - end - end -end - -require_relative "token" diff --git a/lib/syntax_suggest/token.rb b/lib/syntax_suggest/token.rb index d4577f5..f5967d9 100644 --- a/lib/syntax_suggest/token.rb +++ b/lib/syntax_suggest/token.rb @@ -5,50 +5,36 @@ module SyntaxSuggest # # This lex: # - # [1, 0], :on_ident, "describe", CMDARG + # [IDENTIFIER(1,0)-(1,8)("describe"), 32] # # Would translate into: # - # token.line # => 1 - # token.type # => :on_indent - # token.value # => "describe" + # lex.location # => (1,0)-(1,8) + # lex.type # => :IDENTIFIER + # lex.token # => "describe" class Token - attr_reader :line, :type, :value, :state + attr_reader :location, :type, :value - def initialize(line, type, value, state, last_token = nil) - @line = line - @type = type - @value = value - @state = state + KW_TYPES = %i[ + KEYWORD_IF KEYWORD_UNLESS KEYWORD_WHILE KEYWORD_UNTIL + KEYWORD_DEF KEYWORD_CASE KEYWORD_FOR KEYWORD_BEGIN KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DO KEYWORD_DO_LOOP + ].to_set.freeze + private_constant :KW_TYPES - set_kw_end(last_token) - end - - private def set_kw_end(last_token) - @is_end = false - @is_kw = false - return if type != :on_kw - - return if last_token && last_token.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953 - - case value - when "if", "unless", "while", "until" - # Only count if/unless when it's not a "trailing" if/unless - # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375 - @is_kw = true unless expr_label? - when "def", "case", "for", "begin", "class", "module", "do" - @is_kw = true - when "end" - @is_end = true - end - end + def initialize(prism_token, previous_prism_token, visitor) + @location = prism_token.location + @type = prism_token.type + @value = prism_token.value - def fname? - state.allbits?(Ripper::EXPR_FNAME) + # Prism lexes `:module` as SYMBOL_BEGIN, KEYWORD_MODULE + # https://github.com/ruby/prism/issues/3940 + symbol_content = previous_prism_token&.type == :SYMBOL_BEGIN + @is_kw = KW_TYPES.include?(@type) && !symbol_content && !visitor.endless_def_keyword_locs.include?(@location) + @is_end = @type == :KEYWORD_END end - def ignore_newline? - type == :on_ignored_nl + def line + @location.start_line end def is_end? @@ -58,13 +44,5 @@ def is_end? def is_kw? @is_kw end - - def expr_beg? - state.anybits?(Ripper::EXPR_BEG) - end - - def expr_label? - state.allbits?(Ripper::EXPR_LABEL) - end end end diff --git a/lib/syntax_suggest/visitor.rb b/lib/syntax_suggest/visitor.rb new file mode 100644 index 0000000..ff1a36e --- /dev/null +++ b/lib/syntax_suggest/visitor.rb @@ -0,0 +1,52 @@ +module SyntaxSuggest + # A visitor that walks the AST and pulls out information + # that is too dificult to discern by just looking at tokens + class Visitor < Prism::Visitor + attr_reader :endless_def_keyword_locs + + def initialize + @endless_def_keyword_locs = [] + @consecutive_lines = {} + end + + def consecutive_lines + @consecutive_lines.keys.sort + end + + # Record lines where a method call is logically connected + # to subsequent lines. This is the case when a method call + # is broken up by a newline + def visit_call_node(node) + receiver_loc = node.receiver&.location + call_operator_loc = node.call_operator_loc + message_loc = node.message_loc + if receiver_loc && call_operator_loc && message_loc + # foo + # .bar + if receiver_loc.end_line != call_operator_loc.start_line && call_operator_loc.start_line == message_loc.start_line + (receiver_loc.end_line..call_operator_loc.start_line - 1).each do |line| + @consecutive_lines[line] = true + end + end + + # foo. + # bar + if receiver_loc.end_line == call_operator_loc.start_line && call_operator_loc.start_line != message_loc.start_line + (call_operator_loc.start_line..message_loc.start_line - 1).each do |line| + @consecutive_lines[line] = true + end + end + end + super + end + + # Endless method definitions like `def foo = 123` are valid without + # an `end` keyword. We record their keyword here so that we can later + # skip considering them for keywords since they have no coresponding + # end + def visit_def_node(node) + @endless_def_keyword_locs << node.def_keyword_loc if node.equal_loc + super + end + end +end diff --git a/spec/unit/clean_document_spec.rb b/spec/unit/clean_document_spec.rb index 5b5ca04..47d9639 100644 --- a/spec/unit/clean_document_spec.rb +++ b/spec/unit/clean_document_spec.rb @@ -103,19 +103,6 @@ module SyntaxSuggest expect(lines.count).to eq(2) end - it "comments: removes comments" do - source = <<~EOM - # lol - puts "what" - # yolo - EOM - - lines = CleanDocument.new(source: source).lines - expect(lines[0].to_s).to eq($/) - expect(lines[1].to_s).to eq('puts "what"' + $/) - expect(lines[2].to_s).to eq($/) - end - it "trailing slash: does not join trailing do" do # Some keywords and syntaxes trigger the "ignored line" # lex output, we ignore them by filtering by BEG diff --git a/spec/unit/code_block_spec.rb b/spec/unit/code_block_spec.rb index baf1c63..dfea307 100644 --- a/spec/unit/code_block_spec.rb +++ b/spec/unit/code_block_spec.rb @@ -33,7 +33,7 @@ def foo array = [block_2, block_1, block_0].sort expect(array.last).to eq(block_2) - block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4, tokens: [])) + block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4, tokens: [], consecutive: false)) array.prepend(block) expect(array.max).to eq(block) end diff --git a/spec/unit/code_line_spec.rb b/spec/unit/code_line_spec.rb index 761c460..8b5fa1a 100644 --- a/spec/unit/code_line_spec.rb +++ b/spec/unit/code_line_spec.rb @@ -44,7 +44,7 @@ def square(x) = x * x EOM # Indicates line 1 can join 2, 2 can join 3, but 3 won't join it's next line - expect(code_lines.map(&:ignore_newline_not_beg?)).to eq([true, true, false, false]) + expect(code_lines.map(&:consecutive?)).to eq([true, true, false, false]) end it "trailing if" do diff --git a/spec/unit/lex_all_spec.rb b/spec/unit/lex_all_spec.rb deleted file mode 100644 index b88ae65..0000000 --- a/spec/unit/lex_all_spec.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -require_relative "../spec_helper" - -module SyntaxSuggest - RSpec.describe "EndBlockParse" do - it "finds blocks based on `end` keyword" do - source = <<~EOM - describe "cat" # 1 - Cat.call do # 2 - end # 3 - end # 4 - # 5 - it "dog" do # 6 - Dog.call do # 7 - end # 8 - end # 9 - EOM - - tokens = LexAll.new(source: source) - expect(tokens.map(&:value)).to include("dog") - expect(tokens.first.line).to eq(1) - expect(tokens.last.line).to eq(9) - end - end -end