Skip to content

Commit 328a73b

Browse files
committed
Fully migrate to prism
It mostly continues to rely on tokens. But for a few things like endless method defs and multiline method continuations it uses AST. These are either very difficult or not possible to find just by checking tokens. Because of multiline method calls, comments now don't need to be trimmed anymore.
1 parent 051f520 commit 328a73b

11 files changed

Lines changed: 140 additions & 317 deletions

File tree

lib/syntax_suggest/api.rb

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99

1010
# Prism is the new parser, replacing Ripper
1111
require "prism"
12-
# We need Ripper loaded for `Prism.lex_compat` even if we're using Prism
13-
# for lexing and parsing
14-
require "ripper"
1512

1613
module SyntaxSuggest
1714
# Used to indicate a default value that cannot
@@ -188,7 +185,6 @@ def self.valid?(source)
188185
require_relative "clean_document"
189186

190187
# Helpers
191-
require_relative "lex_all"
192188
require_relative "code_line"
193189
require_relative "code_block"
194190
require_relative "block_expand"
@@ -200,3 +196,5 @@ def self.valid?(source)
200196
require_relative "pathname_from_message"
201197
require_relative "display_invalid_blocks"
202198
require_relative "parse_blocks_from_indent_line"
199+
require_relative "visitor"
200+
require_relative "token"

lib/syntax_suggest/clean_document.rb

Lines changed: 9 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -67,26 +67,9 @@ module SyntaxSuggest
6767
# All of these problems are fixed by joining the whole heredoc into a single
6868
# line.
6969
#
70-
# ## Comments and whitespace
71-
#
72-
# Comments can throw off the way the lexer tells us that the line
73-
# logically belongs with the next line. This is valid ruby but
74-
# results in a different lex output than before:
75-
#
76-
# 1 User.
77-
# 2 where(name: "schneems").
78-
# 3 # Comment here
79-
# 4 first
80-
#
81-
# To handle this we can replace comment lines with empty lines
82-
# and then re-lex the source. This removal and re-lexing preserves
83-
# line index and document size, but generates an easier to work with
84-
# document.
85-
#
8670
class CleanDocument
8771
def initialize(source:)
88-
lines = clean_sweep(source: source)
89-
@document = CodeLine.from_source(lines.join)
72+
@document = CodeLine.from_source(source)
9073
end
9174

9275
# Call all of the document "cleaners"
@@ -110,62 +93,6 @@ def to_s
11093
@document.join
11194
end
11295

113-
# Remove comments
114-
#
115-
# replace with empty newlines
116-
#
117-
# source = <<~'EOM'
118-
# # Comment 1
119-
# puts "hello"
120-
# # Comment 2
121-
# puts "world"
122-
# EOM
123-
#
124-
# lines = CleanDocument.new(source: source).lines
125-
# expect(lines[0].to_s).to eq("\n")
126-
# expect(lines[1].to_s).to eq("puts "hello")
127-
# expect(lines[2].to_s).to eq("\n")
128-
# expect(lines[3].to_s).to eq("puts "world")
129-
#
130-
# Important: This must be done before lexing.
131-
#
132-
# After this change is made, we lex the document because
133-
# removing comments can change how the doc is parsed.
134-
#
135-
# For example:
136-
#
137-
# values = LexAll.new(source: <<~EOM))
138-
# User.
139-
# # comment
140-
# where(name: 'schneems')
141-
# EOM
142-
# expect(
143-
# values.count {|v| v.type == :on_ignored_nl}
144-
# ).to eq(1)
145-
#
146-
# After the comment is removed:
147-
#
148-
# values = LexAll.new(source: <<~EOM))
149-
# User.
150-
#
151-
# where(name: 'schneems')
152-
# EOM
153-
# expect(
154-
# values.count {|v| v.type == :on_ignored_nl}
155-
# ).to eq(2)
156-
#
157-
def clean_sweep(source:)
158-
# Match comments, but not HEREDOC strings with #{variable} interpolation
159-
# https://rubular.com/r/HPwtW9OYxKUHXQ
160-
source.lines.map do |line|
161-
if line.match?(/^\s*#([^{].*|)$/)
162-
$/
163-
else
164-
line
165-
end
166-
end
167-
end
168-
16996
# Smushes all heredoc lines into one line
17097
#
17198
# source = <<~'EOM'
@@ -184,9 +111,9 @@ def join_heredoc!
184111
lines.each do |line|
185112
line.tokens.each do |token|
186113
case token.type
187-
when :on_heredoc_beg
114+
when :HEREDOC_START
188115
start_index_stack << line.index
189-
when :on_heredoc_end
116+
when :HEREDOC_END
190117
start_index = start_index_stack.pop
191118
end_index = line.index
192119
heredoc_beg_end_index << [start_index, end_index]
@@ -212,20 +139,10 @@ def join_heredoc!
212139
# expect(lines[0].to_s).to eq(source)
213140
# expect(lines[1].to_s).to eq("")
214141
#
215-
# The one known case this doesn't handle is:
216-
#
217-
# Ripper.lex <<~EOM
218-
# a &&
219-
# b ||
220-
# c
221-
# EOM
222-
#
223-
# For some reason this introduces `on_ignore_newline` but with BEG type
224-
#
225142
def join_consecutive!
226-
consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
143+
consecutive_groups = @document.select(&:consecutive?).map do |code_line|
227144
take_while_including(code_line.index..) do |line|
228-
line.ignore_newline_not_beg?
145+
line.consecutive?
229146
end
230147
end
231148

@@ -264,25 +181,19 @@ def join_trailing_slash!
264181
# To preserve document size, empty lines are placed
265182
# in the place of the lines that were "joined"
266183
def join_groups(groups)
267-
groups.each do |lines|
268-
line = lines.first
269-
184+
groups.each do |line, *other_lines|
270185
# Handle the case of multiple groups in a row
271186
# if one is already replaced, move on
272187
next if @document[line.index].empty?
273188

274189
# Join group into the first line
275-
@document[line.index] = CodeLine.new(
276-
tokens: lines.map(&:tokens).flatten,
277-
line: lines.join,
278-
index: line.index
279-
)
190+
@document[line.index] = line.merge(other_lines)
280191

281192
# Hide the rest of the lines
282-
lines[1..].each do |line|
193+
other_lines.each do |other|
283194
# The above lines already have newlines in them, if add more
284195
# then there will be double newline, use an empty line instead
285-
@document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [])
196+
@document[other.index] = CodeLine.empty(other.index)
286197
end
287198
end
288199
self

lib/syntax_suggest/code_line.rb

Lines changed: 46 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,41 @@ class CodeLine
2727
# Returns an array of CodeLine objects
2828
# from the source string
2929
def self.from_source(source)
30-
tokens = LexAll.new(source: source)
30+
ast, tokens = Prism.parse_lex(source).value
31+
tokens.sort_by! { |token, _state| token.location.start_line }
32+
prev_token = nil
33+
tokens.map! do |token, _state|
34+
prev_token = Token.new(token, prev_token)
35+
end
36+
37+
visitor = Visitor.new
38+
visitor.visit(ast)
39+
3140
tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token }
3241
source.lines.map.with_index do |line, index|
3342
CodeLine.new(
3443
line: line,
3544
index: index,
36-
tokens: tokens_for_line[index + 1]
45+
tokens: tokens_for_line[index + 1],
46+
endless_def_count: visitor.endless_def_count[index + 1],
47+
consecutive: visitor.consecutive_lines.include?(index + 1)
3748
)
3849
end
3950
end
4051

41-
attr_reader :line, :index, :tokens, :line_number, :indent
42-
def initialize(line:, index:, tokens:)
52+
# Returns a new CodeLine for the empty string,
53+
# used as a placeholder when joining lines
54+
def self.empty(index)
55+
CodeLine.new(line: "", index: index, tokens: [], endless_def_count: 0, consecutive: false)
56+
end
57+
58+
attr_reader :line, :index, :tokens, :line_number, :indent, :endless_def_count
59+
def initialize(line:, index:, tokens:, endless_def_count:, consecutive:)
4360
@tokens = tokens
4461
@line = line
4562
@index = index
63+
@endless_def_count = endless_def_count
64+
@consecutive = consecutive
4665
@original = line
4766
@line_number = @index + 1
4867
strip_line = line.dup
@@ -57,6 +76,18 @@ def initialize(line:, index:, tokens:)
5776
set_kw_end
5877
end
5978

79+
# Return a new codeline, with the given lines
80+
# merged into the new instance
81+
def merge(other)
82+
CodeLine.new(
83+
tokens: tokens + other.map(&:tokens).flatten,
84+
line: line + other.join,
85+
index: index,
86+
endless_def_count: endless_def_count + other.sum { |line| line.endless_def_count },
87+
consecutive: false
88+
)
89+
end
90+
6091
# Used for stable sort via indentation level
6192
#
6293
# Ruby's sort is not "stable" meaning that when
@@ -151,90 +182,37 @@ def <=>(other)
151182
index <=> other.index
152183
end
153184

154-
# [Not stable API]
155-
#
156-
# Lines that have a `on_ignored_nl` type token and NOT
157-
# a `BEG` type seem to be a good proxy for the ability
158-
# to join multiple lines into one.
159-
#
160-
# This predicate method is used to determine when those
161-
# two criteria have been met.
162-
#
163-
# The one known case this doesn't handle is:
164-
#
165-
# Ripper.lex <<~EOM
166-
# a &&
167-
# b ||
168-
# c
169-
# EOM
170-
#
171-
# For some reason this introduces `on_ignore_newline` but with BEG type
172-
def ignore_newline_not_beg?
173-
@ignore_newline_not_beg
185+
# Can this line be logically joined together
186+
# with the following line? Determined by walking
187+
# the AST
188+
def consecutive?
189+
@consecutive
174190
end
175191

176-
# Determines if the given line has a trailing slash
192+
# Determines if the given line has a trailing slash.
193+
# Simply check if the line contains a backslash after
194+
# the content of the last token.
177195
#
178196
# lines = CodeLine.from_source(<<~EOM)
179197
# it "foo" \
180198
# EOM
181199
# expect(lines.first.trailing_slash?).to eq(true)
182200
#
183201
def trailing_slash?
184-
last = @tokens.last
185-
186-
# Older versions of prism diverged slightly from Ripper in compatibility mode
187-
case last&.type
188-
when :on_sp
189-
last.value == TRAILING_SLASH
190-
when :on_tstring_end
191-
true
192-
else
193-
false
194-
end
202+
return unless (last = @tokens.last)
203+
@line.byteindex(TRAILING_SLASH, last.location.end_column) != nil
195204
end
196205

197-
# Endless method detection
198-
#
199-
# From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
200-
# Detecting a "oneliner" seems to need a state machine.
201-
# This can be done by looking mostly at the "state" (last value):
202-
#
203-
# ENDFN -> BEG (token = '=' ) -> END
204-
#
205206
private def set_kw_end
206-
oneliner_count = 0
207-
in_oneliner_def = nil
208-
209207
kw_count = 0
210208
end_count = 0
211209

212-
@ignore_newline_not_beg = false
213210
@tokens.each do |token|
214211
kw_count += 1 if token.is_kw?
215212
end_count += 1 if token.is_end?
216-
217-
if token.type == :on_ignored_nl
218-
@ignore_newline_not_beg = !token.expr_beg?
219-
end
220-
221-
if in_oneliner_def.nil?
222-
in_oneliner_def = :ENDFN if token.state.allbits?(Ripper::EXPR_ENDFN)
223-
elsif token.state.allbits?(Ripper::EXPR_ENDFN)
224-
# Continue
225-
elsif token.state.allbits?(Ripper::EXPR_BEG)
226-
in_oneliner_def = :BODY if token.value == "="
227-
elsif token.state.allbits?(Ripper::EXPR_END)
228-
# We found an endless method, count it
229-
oneliner_count += 1 if in_oneliner_def == :BODY
230-
231-
in_oneliner_def = nil
232-
else
233-
in_oneliner_def = nil
234-
end
235213
end
236214

237-
kw_count -= oneliner_count
215+
kw_count -= @endless_def_count
238216

239217
@is_kw = (kw_count - end_count) > 0
240218
@is_end = (end_count - kw_count) > 0

0 commit comments

Comments
 (0)