From b113e6812a76b15c9ae441d2b3accf65867123ae Mon Sep 17 00:00:00 2001 From: doorgan Date: Sat, 17 Jan 2026 07:52:51 -0300 Subject: [PATCH 1/2] fix: improve recovery from unexpected semicolons --- lib/spitfire.ex | 273 +++++++++++++++++++++++------------------ test/spitfire_test.exs | 58 +++++++-- 2 files changed, 207 insertions(+), 124 deletions(-) diff --git a/lib/spitfire.ex b/lib/spitfire.ex index 9db95c1..b78034b 100644 --- a/lib/spitfire.ex +++ b/lib/spitfire.ex @@ -131,9 +131,8 @@ defmodule Spitfire do def parse(code, opts \\ []) do parser = code |> new(opts) |> next_token() |> next_token() - # eat all the beginning eol tokens in case the file starts with a comment parser = - while current_token(parser) == :eol <- parser do + while current_token(parser) in [:eol, :";"] <- parser do next_token(parser) end @@ -195,6 +194,16 @@ defmodule Spitfire do while2 current_token(parser) != :eof <- parser do {ast, parser} = parse_expression(parser, @lowest, false, false, true) + {ast, parser} = + case ast do + {:comma, meta, _} -> + parser = put_error(parser, {meta, "unexpected token: ,"}) + {{:__block__, [{:error, true} | meta], []}, parser} + + _ -> + {ast, parser} + end + parser = cond do match?({:__block__, [{:error, true} | _], _}, ast) -> @@ -275,7 +284,7 @@ defmodule Spitfire do :"[" -> parse_list_literal(parser) :"(" -> parse_grouped_expression(parser) :"{" -> parse_tuple_literal(parser) - :";" -> raise "semicolon" + :";" -> parse_unexpected_semicolon(parser) :%{} -> parse_map_literal(parser) :% -> parse_struct_literal(parser) :ellipsis_op -> parse_ellipsis_op(parser) @@ -443,120 +452,136 @@ defmodule Spitfire do parser = Map.put(parser, :nesting, 0) - {expression, parser} = parse_expression(parser, @lowest, false, false, true) + if current_token(parser) == :")" do + closing_paren_meta = current_meta(parser) + parser = Map.put(parser, :nesting, old_nesting) + {{:__block__, [parens: opening_paren_meta ++ [closing: closing_paren_meta]], []}, parser} + else + {expression, parser} = parse_expression(parser, @lowest, false, false, true) - expression = push_eoe(expression, peek_eoe(parser)) + expression = push_eoe(expression, peek_eoe(parser)) - cond do - # if the next token is the closing paren or if the next token is a newline and the next next token is the closing paren - peek_token(parser) == :")" || (peek_token(parser) == :eol && peek_token(next_token(parser)) == :")") -> - parser = - parser - |> Map.put(:nesting, old_nesting) - |> next_token() - |> eat_eol() + cond do + # if the next token is the closing paren or if the next token is a newline and the next next token is the closing paren + peek_token(parser) == :")" || (peek_token(parser) == :eol && peek_token(next_token(parser)) == :")") -> + parser = + parser + |> Map.put(:nesting, old_nesting) + |> next_token() + |> eat_eol() - closing_paren_meta = current_meta(parser) + closing_paren_meta = current_meta(parser) - ast = - case expression do - # unquote splicing is special cased, if it has one expression as an arg, its wrapped in a block - {:unquote_splicing, _, [_]} -> - {:__block__, [{:closing, current_meta(parser)} | orig_meta], [expression]} + ast = + case expression do + # unquote splicing is special cased, if it has one expression as an arg, its wrapped in a block + {:unquote_splicing, _, [_]} -> + {:__block__, [{:closing, current_meta(parser)} | orig_meta], [expression]} - # not and ! are special cased, if it has one expression as an arg, its wrapped in a block - {op, _, [_]} when op in [:not, :!] -> - {:__block__, [], [expression]} + # not and ! are special cased, if it has one expression as an arg, its wrapped in a block + {op, _, [_]} when op in [:not, :!] -> + {:__block__, [], [expression]} - {:->, _, _} -> - [expression] + {:->, _, _} -> + [expression] - {f, meta, a} -> - {f, [parens: opening_paren_meta ++ [closing: closing_paren_meta]] ++ meta, a} + {f, meta, a} -> + {f, [parens: opening_paren_meta ++ [closing: closing_paren_meta]] ++ meta, a} - expression -> - expression - end + expression -> + expression + end - {ast, parser} + {ast, parser} - # if the next token is a new line, but the next next token is not the closing paren (implied from previous clause) - peek_token(parser) == :eol or current_token(parser) == :-> -> - # second conditon checks of the next next token is a closing paren or another expression - {exprs, parser} = - while2 current_token(parser) == :-> || - (peek_token(parser) == :eol && parser |> next_token() |> peek_token() != :")") <- parser do - {ast, parser} = - case Map.get(parser, :stab_state) do - %{ast: lhs} -> - {ast, parser} = parse_stab_expression(Map.delete(parser, :stab_state), lhs) - - {ast, parser} = - if current_token(parser) == :-> do - {ast, parser} - else - if peek_token(parser) == :")" do + peek_token(parser) in [:eol, :";"] or current_token(parser) == :-> -> + # second conditon checks of the next next token is a closing paren or another expression + {exprs, parser} = + while2 current_token(parser) == :-> || + (peek_token(parser) in [:eol, :";"] && parser |> next_token() |> peek_token() != :")") <- + parser do + {ast, parser} = + case Map.get(parser, :stab_state) do + %{ast: lhs} -> + {ast, parser} = parse_stab_expression(Map.delete(parser, :stab_state), lhs) + + {ast, parser} = + if current_token(parser) == :-> do {ast, parser} else - eoe = current_eoe(parser) - ast = push_eoe(ast, eoe) - {ast, next_token(parser)} + if peek_token(parser) == :")" do + {ast, parser} + else + eoe = current_eoe(parser) + ast = push_eoe(ast, eoe) + {ast, next_token(parser)} + end end - end - {ast, parser} + {ast, parser} - nil -> - parser = parser |> next_token() |> eat_eol() - {ast, parser} = parse_expression(parser, @lowest, false, false, true) + nil -> + parser = parser |> next_token() |> eat_eol() + {ast, parser} = parse_expression(parser, @lowest, false, false, true) - {ast, parser} = - cond do - current_token(parser) == :-> -> - {ast, parser} + {ast, parser} = + cond do + current_token(parser) == :-> -> + {ast, parser} - peek_token(parser) == :")" -> - {ast, parser} - - true -> - eoe = peek_eoe(parser) - ast = push_eoe(ast, eoe) - {ast, parser} - end + peek_token(parser) == :")" -> + {ast, parser} - {ast, parser} - end + true -> + eoe = peek_eoe(parser) + ast = push_eoe(ast, eoe) + {ast, parser} + end - {ast, parser} - end + {ast, parser} + end - # handles if the closing paren is on a new line or the same line - parser = - if peek_token(parser) == :eol do - next_token(parser) - else - parser - end + {ast, parser} + end - if peek_token(parser) == :")" do + # handles if the closing paren is on a new line or the same line parser = - parser - |> Map.put(:nesting, old_nesting) - |> next_token() + if peek_token(parser) == :eol do + next_token(parser) + else + parser + end - exprs = [expression | exprs] + if peek_token(parser) == :")" do + parser = + parser + |> Map.put(:nesting, old_nesting) + |> next_token() - ast = - case exprs do - [{:->, _, _} | _] -> - exprs + exprs = [expression | exprs] - _ -> - {:__block__, [{:closing, current_meta(parser)} | orig_meta], exprs} - end + ast = + case exprs do + [{:->, _, _} | _] -> + exprs - {ast, parser} - else + _ -> + {:__block__, [{:closing, current_meta(parser)} | orig_meta], exprs} + end + + {ast, parser} + else + meta = current_meta(parser) + + parser = + parser + |> put_error({meta, "missing closing parentheses"}) + |> Map.put(:nesting, old_nesting) + + {{:__block__, [{:error, true} | meta], []}, next_token(parser)} + end + + true -> meta = current_meta(parser) parser = @@ -565,19 +590,24 @@ defmodule Spitfire do |> Map.put(:nesting, old_nesting) {{:__block__, [{:error, true} | meta], []}, next_token(parser)} - end + end + end + end + end + end - true -> - meta = current_meta(parser) + defp parse_unexpected_semicolon(parser) do + meta = current_meta(parser) + parser = put_error(parser, {meta, "unexpected token: ;"}) + parser = parser |> next_token() |> eat_eol_only() - parser = - parser - |> put_error({meta, "missing closing parentheses"}) - |> Map.put(:nesting, old_nesting) + case current_token_type(parser) do + type when type in [:eof, :end, :block_identifier, :")", :"]", :"}", :">>"] -> + {{:__block__, [{:error, true} | meta], []}, parser} - {{:__block__, [{:error, true} | meta], []}, next_token(parser)} - end - end + _ -> + {expr, parser} = parse_expression(parser, @lowest, false, false, true) + {{:__block__, [{:error, true} | meta], [expr]}, parser} end end @@ -977,7 +1007,8 @@ defmodule Spitfire do defp parse_access_expression(parser, lhs) do trace "parse_access_expression", trace_meta(parser) do meta = current_meta(parser) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eol_only() + {rhs, parser} = parse_expression(parser, @lowest, false, false, false) extra_meta = [from_brackets: true] @@ -988,7 +1019,7 @@ defmodule Spitfire do nl -> [newlines: nl] end - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eol_only() closing = current_meta(parser) meta = extra_meta ++ newlines ++ [{:closing, closing} | meta] @@ -1564,7 +1595,7 @@ defmodule Spitfire do meta = current_meta(parser) orig_parser = parser newlines = get_newlines(parser) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eol_only() cond do current_token(parser) == :">>" -> @@ -1591,9 +1622,9 @@ defmodule Spitfire do case peek_token_eat_eol(parser) do :">>" -> - parser = eat_eol_at(parser, 1) + parser = eat_eol_only_at(parser, 1) parser = next_token(parser) - {{:<<>>, newlines ++ [{:closing, current_meta(parser)} | meta], pairs}, eat_eol(parser)} + {{:<<>>, newlines ++ [{:closing, current_meta(parser)} | meta], pairs}, eat_eol_only(parser)} _ -> all_pairs = pairs |> Enum.reverse() |> Enum.zip(Process.get(:comma_list_parsers)) @@ -1636,7 +1667,7 @@ defmodule Spitfire do parser = next_token(parser) newlines = peek_newlines(parser) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eol_only() old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -1661,7 +1692,7 @@ defmodule Spitfire do true -> {pairs, parser} = parse_comma_list(parser, @list_comma, false, true) - parser = eat_eol_at(parser, 1) + parser = eat_eol_only_at(parser, 1) parser = case peek_token(parser) do @@ -1764,7 +1795,7 @@ defmodule Spitfire do nl -> [newlines: nl] end - parser = eat_eol(parser) + parser = eat_eol_only(parser) old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -1777,7 +1808,7 @@ defmodule Spitfire do else {pairs, parser} = parse_comma_list(parser, @list_comma, false, true) - parser = eat_eol_at(parser, 1) + parser = eat_eol_only_at(parser, 1) parser = case peek_token(parser) do @@ -1802,7 +1833,7 @@ defmodule Spitfire do orig_parser = parser newlines = peek_newlines(parser) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eol_only() old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -1843,8 +1874,8 @@ defmodule Spitfire do {pairs, parser} = case peek_token_eat_eol(parser) do :"}" -> - parser = eat_eol_at(parser, 1) - {pairs, parser |> next_token() |> eat_eol()} + parser = eat_eol_only_at(parser, 1) + {pairs, parser |> next_token() |> eat_eol_only()} _ -> all_pairs = pairs |> Enum.reverse() |> Enum.zip(Process.get(:comma_list_parsers)) @@ -1903,7 +1934,7 @@ defmodule Spitfire do trace "parse_list_literal", trace_meta(parser) do meta = current_meta(parser) orig_parser = parser - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eol_only() old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -1936,7 +1967,7 @@ defmodule Spitfire do case peek_token_eat_eol(parser) do :"]" -> - parser = eat_eol_at(parser, 1) + parser = eat_eol_only_at(parser, 1) parser = Map.put(parser, :nesting, old_nesting) {encode_literal(parser, pairs, orig_meta), next_token(parser)} @@ -2013,12 +2044,12 @@ defmodule Spitfire do {pairs, parser} = parser |> next_token() - |> eat_eol() + |> eat_eol_only() |> parse_comma_list() parser = Map.put(parser, :nesting, old_nesting) - parser = eat_eol_at(parser, 1) + parser = eat_eol_only_at(parser, 1) case peek_token(parser) do :")" -> @@ -2390,10 +2421,18 @@ defmodule Spitfire do eat(%{:eol => true, :";" => true}, parser) end + defp eat_eol_only(parser) do + eat(%{:eol => true}, parser) + end + defp eat_eol_at(parser, idx) do eat_at(parser, [:eol, :";"], idx) end + defp eat_eol_only_at(parser, idx) do + eat_at(parser, [:eol], idx) + end + defp eat_at(parser, tokens, idx) when is_list(tokens) do eat_at(parser, Map.new(tokens, &{&1, true}), idx) end diff --git a/test/spitfire_test.exs b/test/spitfire_test.exs index a806b26..4daecc6 100644 --- a/test/spitfire_test.exs +++ b/test/spitfire_test.exs @@ -33,13 +33,6 @@ defmodule SpitfireTest do ''' assert Spitfire.parse(code) == s2q(code) - - # FIXME: spitfire currently parses this successfully, which is wrong, it should be an error - # code = ~S''' - # foo, do: IO.inspect("bob"); "bob" - # ''' - - # assert Spitfire.parse(code) == s2q(code) end test "parses valid elixir" do @@ -2805,6 +2798,57 @@ defmodule SpitfireTest do } end + test "orphan comma" do + code = ~S''' + foo, do: IO.inspect("bob"); "bob" + ''' + + assert {:error, _ast, errors} = Spitfire.parse(code) + assert Enum.any?(errors, fn {_, msg} -> String.contains?(msg, ",") end) + end + + test "unexpected semicolon" do + code = ~S""" + defmodule MyModule do + import List + ; (__cursor__()) + end + """ + + assert {:error, _ast, errors} = Spitfire.parse(code) + assert Enum.any?(errors, fn {_, msg} -> String.contains?(msg, ";") end) + + code = ~S""" + defmodule MyModule do + foo + + ; bar + end + """ + + assert {:error, _ast, errors} = Spitfire.parse(code) + assert Enum.any?(errors, fn {_, msg} -> String.contains?(msg, ";") end) + end + + test "semicolon in list/tuple/map" do + assert {:error, _, errors} = Spitfire.parse("[;]") + assert Enum.any?(errors, fn {_, msg} -> msg == "unexpected token: ;" end) + + assert {:error, _, errors} = Spitfire.parse("{;}") + assert Enum.any?(errors, fn {_, msg} -> msg == "unexpected token: ;" end) + + assert {:error, _, errors} = Spitfire.parse("%{;}") + assert Enum.any?(errors, fn {_, msg} -> msg == "unexpected token: ;" end) + end + + test "semicolon in parentheses is valid empty block" do + assert {:ok, {:__block__, _, []}} = Spitfire.parse("(;)") + end + + test "leading semicolon is skipped" do + assert {:ok, {:foo, _, nil}} = Spitfire.parse("; foo") + end + test "weird characters" do code = """ [«] From 577de80d4fa5f0286b5c3aa735ac5400b4f6f000 Mon Sep 17 00:00:00 2001 From: doorgan Date: Sat, 17 Jan 2026 10:02:47 -0300 Subject: [PATCH 2/2] fix: don't report errors on valid code rename eat_* helpers to use `eoe` when it eats eol and ; --- lib/spitfire.ex | 194 +++++++++++++++++++++++------------------ test/spitfire_test.exs | 63 ++++++------- 2 files changed, 137 insertions(+), 120 deletions(-) diff --git a/lib/spitfire.ex b/lib/spitfire.ex index b78034b..a109164 100644 --- a/lib/spitfire.ex +++ b/lib/spitfire.ex @@ -218,7 +218,7 @@ defmodule Spitfire do ast = push_eoe(ast, current_eoe(parser)) - {ast, eat_eol(parser)} + {ast, eat_eoe(parser)} end exprs = build_block_nr(exprs) @@ -441,13 +441,13 @@ defmodule Spitfire do {{:__block__, [{:error, true} | meta], []}, next_token(parser)} peek_token(parser) == :")" -> - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() closing_paren_meta = current_meta(parser) {{:__block__, [parens: opening_paren_meta ++ [closing: closing_paren_meta]], []}, parser} true -> orig_meta = current_meta(parser) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -462,13 +462,13 @@ defmodule Spitfire do expression = push_eoe(expression, peek_eoe(parser)) cond do - # if the next token is the closing paren or if the next token is a newline and the next next token is the closing paren - peek_token(parser) == :")" || (peek_token(parser) == :eol && peek_token(next_token(parser)) == :")") -> + # if the next token is the closing paren, or if after skipping eol/semicolons we find the closing paren + peek_token(parser) == :")" || (peek_token(parser) in [:eol, :";"] && peek_token_skip_eoe(parser) == :")") -> parser = parser |> Map.put(:nesting, old_nesting) |> next_token() - |> eat_eol() + |> eat_eoe() closing_paren_meta = current_meta(parser) @@ -521,7 +521,7 @@ defmodule Spitfire do {ast, parser} nil -> - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {ast, parser} = parse_expression(parser, @lowest, false, false, true) {ast, parser} = @@ -544,12 +544,10 @@ defmodule Spitfire do {ast, parser} end - # handles if the closing paren is on a new line or the same line + # handles if the closing paren is on a new line or after semicolons parser = - if peek_token(parser) == :eol do + while peek_token(parser) in [:eol, :";"] <- parser do next_token(parser) - else - parser end if peek_token(parser) == :")" do @@ -599,7 +597,7 @@ defmodule Spitfire do defp parse_unexpected_semicolon(parser) do meta = current_meta(parser) parser = put_error(parser, {meta, "unexpected token: ;"}) - parser = parser |> next_token() |> eat_eol_only() + parser = parser |> next_token() |> eat_eol() case current_token_type(parser) do type when type in [:eof, :end, :block_identifier, :")", :"]", :"}", :">>"] -> @@ -621,7 +619,7 @@ defmodule Spitfire do defp parse_kw_identifier(%{current_token: {:kw_identifier, meta, token}} = parser) do trace "parse_kw_identifier", trace_meta(parser) do token = encode_literal(parser, token, meta) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {expr, parser} = parse_expression(parser, @kw_identifier, false, false, false) @@ -632,7 +630,7 @@ defmodule Spitfire do defp parse_kw_identifier(%{current_token: {:kw_identifier_unsafe, meta, tokens}} = parser) do trace "parse_kw_identifier (unsafe)", trace_meta(parser) do {atom, parser} = parse_atom(%{parser | current_token: {:atom_unsafe, meta, tokens}}) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {expr, parser} = parse_expression(parser, @kw_identifier, false, false, false) @@ -651,7 +649,7 @@ defmodule Spitfire do defp parse_bracketless_kw_list(%{current_token: {:kw_identifier, meta, token}} = parser) do trace "parse_bracketless_kw_list", trace_meta(parser) do token = encode_literal(parser, token, meta) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {value, parser} = parse_expression(parser, @kw_identifier, false, false, false) @@ -670,7 +668,7 @@ defmodule Spitfire do defp parse_bracketless_kw_list(%{current_token: {:kw_identifier_unsafe, meta, tokens}} = parser) do trace "parse_bracketless_kw_list (unsafe)", trace_meta(parser) do {atom, parser} = parse_atom(%{parser | current_token: {:atom_unsafe, meta, tokens}}) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() atom = case atom do @@ -697,7 +695,7 @@ defmodule Spitfire do defp parse_assoc_op(%{current_token: {:assoc_op, _, _token}} = parser, key) do trace "parse_assoc_op", trace_meta(parser) do assoc_meta = current_meta(parser) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {value, parser} = parse_expression(parser, @assoc_op, false, false, false) key = @@ -758,7 +756,7 @@ defmodule Spitfire do current_precedence(parser) end - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {rhs, parser} = parse_expression(parser, precedence, false, false, false) ast = {token, meta, [rhs]} @@ -817,12 +815,12 @@ defmodule Spitfire do {exprs, parser} = while2 peek_token(parser) not in [:end, :")"] <- parser do - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {ast, parser} = parse_expression(parser, @lowest, false, false, true) eoe = peek_eoe(parser) - parser = eat_eol_at(parser, 1) + parser = eat_eoe_at(parser, 1) ast = push_eoe(ast, eoe) @@ -870,7 +868,7 @@ defmodule Spitfire do meta = current_meta(parser) newlines = get_newlines(parser) - parser = eat_eol_at(parser, 1) + parser = eat_eoe_at(parser, 1) old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -884,9 +882,9 @@ defmodule Spitfire do if Map.get(parser, :stab_state) == nil do eoe = peek_eoe(parser) ast = push_eoe(ast, eoe) - parser = eat_eol_at(parser, 1) + parser = eat_eoe_at(parser, 1) - {ast, eat_eol(parser)} + {ast, eat_eoe(parser)} else {:filter, {nil, next_token(parser)}} end @@ -917,17 +915,17 @@ defmodule Spitfire do parser = Map.put(parser, :nesting, old_nesting) - {ast, eat_eol(parser)} + {ast, eat_eoe(parser)} end end end defp parse_comma(parser, lhs) do trace "parse_comma", trace_meta(parser) do - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {exprs, parser} = parse_comma_list(parser, @comma) - {{:comma, [], [lhs | exprs]}, eat_eol(parser)} + {{:comma, [], [lhs | exprs]}, eat_eoe(parser)} end end @@ -945,7 +943,7 @@ defmodule Spitfire do nl -> [newlines: nl] end - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {rhs, parser} = parse_expression(parser, precedence, false, false, false) @@ -994,7 +992,7 @@ defmodule Spitfire do parser = next_token(parser) - parser = eat_eol(parser) + parser = eat_eoe(parser) {pairs, parser} = parse_comma_list(parser, @list_comma, false, true) @@ -1007,7 +1005,7 @@ defmodule Spitfire do defp parse_access_expression(parser, lhs) do trace "parse_access_expression", trace_meta(parser) do meta = current_meta(parser) - parser = parser |> next_token() |> eat_eol_only() + parser = parser |> next_token() |> eat_eol() {rhs, parser} = parse_expression(parser, @lowest, false, false, false) @@ -1019,7 +1017,7 @@ defmodule Spitfire do nl -> [newlines: nl] end - parser = parser |> next_token() |> eat_eol_only() + parser = parser |> next_token() |> eat_eol() closing = current_meta(parser) meta = extra_meta ++ newlines ++ [{:closing, closing} | meta] @@ -1049,9 +1047,9 @@ defmodule Spitfire do parser = parser |> next_token() |> next_token() {rrhs, parser} = parse_expression(parser, precedence, false, false, false) - {{:..//, meta, [lhs, rhs, rrhs]}, eat_eol(parser)} + {{:..//, meta, [lhs, rhs, rrhs]}, eat_eoe(parser)} else - {{token, meta, [lhs, rhs]}, eat_eol(parser)} + {{token, meta, [lhs, rhs]}, eat_eoe(parser)} end end end @@ -1099,16 +1097,16 @@ defmodule Spitfire do parser = Map.put(parser, :nesting, 0) {exprs, {_, parser}} = - while2 peek_token_eat_eol(parser) not in [:end, :eof] <- {type, parser} do + while2 peek_token_eat_eoe(parser) not in [:end, :eof] <- {type, parser} do {exprs, parser} = - while2 peek_token_eat_eol(parser) not in [:end, :block_identifier, :eof] <- parser do + while2 peek_token_eat_eoe(parser) not in [:end, :block_identifier, :eof] <- parser do {ast, parser} = case Map.get(parser, :stab_state) do %{ast: lhs} -> parse_stab_expression(Map.delete(parser, :stab_state), lhs) nil -> - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() parse_expression(parser, @lowest, false, false, true) end @@ -1119,9 +1117,9 @@ defmodule Spitfire do {ast, parser} end - case peek_token_eat_eol(parser) do + case peek_token_eat_eoe(parser) do :block_identifier -> - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {:block_identifier, meta, token} = parser.current_token {{type, exprs}, {encode_literal(parser, token, meta), parser}} @@ -1139,8 +1137,8 @@ defmodule Spitfire do end {parser, end_meta} = - if peek_token_eat_eol(parser) == :end do - parser = parser |> next_token() |> eat_eol() + if peek_token_eat_eoe(parser) == :end do + parser = parser |> next_token() |> eat_eoe() {parser, current_meta(parser)} else {put_error(parser, {do_meta, "missing `end` for do block"}), do_meta} @@ -1185,9 +1183,9 @@ defmodule Spitfire do parser = next_token(parser) newlines = get_newlines(parser) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {multis, parser} = parse_comma_list(parser) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() multis = {{:., dot_meta, [lhs, :{}]}, newlines ++ [{:closing, current_meta(parser)} | dot_meta], multis} @@ -1215,7 +1213,7 @@ defmodule Spitfire do parser = next_token(parser) {ast, parser} = parse_access_expression(parser, rhs) - {ast, eat_eol(parser)} + {ast, eat_eoe(parser)} type when type in [:identifier, :paren_identifier, :do_identifier] -> parser = next_token(parser) @@ -1256,7 +1254,7 @@ defmodule Spitfire do meta = current_meta(parser) newlines = get_newlines(parser) - parser = parser |> next_token() |> eat_eol() + parser = parser |> next_token() |> eat_eoe() {exprs, parser} = while2 current_token(parser) not in [:end, :eof] <- parser do @@ -1276,7 +1274,7 @@ defmodule Spitfire do parser = next_token(parser) eoe = current_eoe(parser) ast = push_eoe(ast, eoe) - {ast, eat_eol(parser)} + {ast, eat_eoe(parser)} end end @@ -1296,7 +1294,7 @@ defmodule Spitfire do parser = next_token(parser) eoe = current_eoe(parser) ast = push_eoe(ast, eoe) - {ast, eat_eol(parser)} + {ast, eat_eoe(parser)} end end @@ -1325,7 +1323,7 @@ defmodule Spitfire do parser = next_token(parser) newlines = get_newlines(parser) - parser = eat_eol(parser) + parser = eat_eoe(parser) if peek_token(parser) == :")" do parser = next_token(parser) @@ -1333,8 +1331,8 @@ defmodule Spitfire do ast = {{:., meta, [lhs]}, newlines ++ closing ++ meta, []} {ast, parser} else - {pairs, parser} = parse_comma_list(parser |> next_token() |> eat_eol()) - parser = parser |> next_token() |> eat_eol() + {pairs, parser} = parse_comma_list(parser |> next_token() |> eat_eoe()) + parser = parser |> next_token() |> eat_eoe() closing = [closing: current_meta(parser)] ast = {{:., meta, [lhs]}, newlines ++ closing ++ meta, pairs} @@ -1447,7 +1445,7 @@ defmodule Spitfire do } |> next_token() |> next_token() - |> eat_eol() + |> eat_eoe() {ast, parser} = parse_expression(parser) ast = push_eoe(ast, peek_eoe(parser)) @@ -1523,7 +1521,7 @@ defmodule Spitfire do } |> next_token() |> next_token() - |> eat_eol() + |> eat_eoe() {ast, parser} = parse_expression(parser) ast = push_eoe(ast, peek_eoe(parser)) @@ -1595,7 +1593,7 @@ defmodule Spitfire do meta = current_meta(parser) orig_parser = parser newlines = get_newlines(parser) - parser = parser |> next_token() |> eat_eol_only() + parser = parser |> next_token() |> eat_eol() cond do current_token(parser) == :">>" -> @@ -1620,11 +1618,11 @@ defmodule Spitfire do old_comma_list_parsers = Process.get(:comma_list_parsers) {pairs, parser} = parse_comma_list(parser, @list_comma, true, false) - case peek_token_eat_eol(parser) do + case peek_token_eat_eoe(parser) do :">>" -> - parser = eat_eol_only_at(parser, 1) + parser = eat_eol_at(parser, 1) parser = next_token(parser) - {{:<<>>, newlines ++ [{:closing, current_meta(parser)} | meta], pairs}, eat_eol_only(parser)} + {{:<<>>, newlines ++ [{:closing, current_meta(parser)} | meta], pairs}, eat_eol(parser)} _ -> all_pairs = pairs |> Enum.reverse() |> Enum.zip(Process.get(:comma_list_parsers)) @@ -1667,7 +1665,7 @@ defmodule Spitfire do parser = next_token(parser) newlines = peek_newlines(parser) - parser = parser |> next_token() |> eat_eol_only() + parser = parser |> next_token() |> eat_eol() old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -1692,7 +1690,7 @@ defmodule Spitfire do true -> {pairs, parser} = parse_comma_list(parser, @list_comma, false, true) - parser = eat_eol_only_at(parser, 1) + parser = eat_eol_at(parser, 1) parser = case peek_token(parser) do @@ -1795,7 +1793,7 @@ defmodule Spitfire do nl -> [newlines: nl] end - parser = eat_eol_only(parser) + parser = eat_eol(parser) old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -1808,7 +1806,7 @@ defmodule Spitfire do else {pairs, parser} = parse_comma_list(parser, @list_comma, false, true) - parser = eat_eol_only_at(parser, 1) + parser = eat_eol_at(parser, 1) parser = case peek_token(parser) do @@ -1833,7 +1831,7 @@ defmodule Spitfire do orig_parser = parser newlines = peek_newlines(parser) - parser = parser |> next_token() |> eat_eol_only() + parser = parser |> next_token() |> eat_eol() old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -1872,10 +1870,10 @@ defmodule Spitfire do {pairs, parser} = parse_comma_list(parser) {pairs, parser} = - case peek_token_eat_eol(parser) do + case peek_token_eat_eoe(parser) do :"}" -> - parser = eat_eol_only_at(parser, 1) - {pairs, parser |> next_token() |> eat_eol_only()} + parser = eat_eol_at(parser, 1) + {pairs, parser |> next_token() |> eat_eol()} _ -> all_pairs = pairs |> Enum.reverse() |> Enum.zip(Process.get(:comma_list_parsers)) @@ -1934,7 +1932,7 @@ defmodule Spitfire do trace "parse_list_literal", trace_meta(parser) do meta = current_meta(parser) orig_parser = parser - parser = parser |> next_token() |> eat_eol_only() + parser = parser |> next_token() |> eat_eol() old_nesting = parser.nesting parser = Map.put(parser, :nesting, 0) @@ -1963,11 +1961,11 @@ defmodule Spitfire do old_comma_list_parsers = Process.get(:comma_list_parsers) {pairs, parser} = parse_comma_list(parser, @list_comma, true, false) - # parser = eat_eol_at(parser, 1) + # parser = eat_eoe_at(parser, 1) - case peek_token_eat_eol(parser) do + case peek_token_eat_eoe(parser) do :"]" -> - parser = eat_eol_only_at(parser, 1) + parser = eat_eol_at(parser, 1) parser = Map.put(parser, :nesting, old_nesting) {encode_literal(parser, pairs, orig_meta), next_token(parser)} @@ -2044,12 +2042,12 @@ defmodule Spitfire do {pairs, parser} = parser |> next_token() - |> eat_eol_only() + |> eat_eol() |> parse_comma_list() parser = Map.put(parser, :nesting, old_nesting) - parser = eat_eol_only_at(parser, 1) + parser = eat_eol_at(parser, 1) case peek_token(parser) do :")" -> @@ -2202,10 +2200,10 @@ defmodule Spitfire do {pairs, parser} = parser |> next_token() - |> eat_eol() + |> eat_eoe() |> parse_comma_list() - parser = eat_eol_at(parser, 1) + parser = eat_eoe_at(parser, 1) parser = case peek_token(parser) do @@ -2305,7 +2303,7 @@ defmodule Spitfire do } |> next_token() |> next_token() - |> eat_eol() + |> eat_eoe() {ast, parser} = parse_expression(parser) ast = push_eoe(ast, peek_eoe(parser)) @@ -2417,19 +2415,31 @@ defmodule Spitfire do parser end - defp eat_eol(parser) do - eat(%{:eol => true, :";" => true}, parser) + defp eat_eoe(parser) do + case current_token(parser) do + type when type in [:eol, :";"] -> eat_eoe(next_token(parser)) + _ -> parser + end end - defp eat_eol_only(parser) do + defp eat_eol(parser) do eat(%{:eol => true}, parser) end - defp eat_eol_at(parser, idx) do - eat_at(parser, [:eol, :";"], idx) + defp eat_eoe_at(%{tokens: [next | rest]} = parser, 1) do + case peek_token(parser) do + type when type in [:eol, :";"] -> + eat_eoe_at(%{parser | peek_token: next, tokens: rest}, 1) + + _ -> + parser + end end - defp eat_eol_only_at(parser, idx) do + defp eat_eoe_at(%{tokens: []} = parser, 1), do: parser + defp eat_eoe_at(%{tokens: :eot} = parser, 1), do: parser + + defp eat_eol_at(parser, idx) do eat_at(parser, [:eol], idx) end @@ -2481,38 +2491,50 @@ defmodule Spitfire do :eof end - defp peek_token_eat_eol(%{peek_token: {:eol, _token}} = parser) do - peek_token_eat_eol(next_token(parser)) + defp peek_token_eat_eoe(%{peek_token: {:eol, _token}} = parser) do + peek_token_eat_eoe(next_token(parser)) end - defp peek_token_eat_eol(%{peek_token: {:stab_op, _, token}}) do + defp peek_token_eat_eoe(%{peek_token: {:stab_op, _, token}}) do token end - defp peek_token_eat_eol(%{peek_token: {type, _, _, _}}) when type in [:list_heredoc, :bin_heredoc] do + defp peek_token_eat_eoe(%{peek_token: {type, _, _, _}}) when type in [:list_heredoc, :bin_heredoc] do type end - defp peek_token_eat_eol(%{peek_token: {token, _, _}}) do + defp peek_token_eat_eoe(%{peek_token: {token, _, _}}) do token end - defp peek_token_eat_eol(%{peek_token: {token, _}}) do + defp peek_token_eat_eoe(%{peek_token: {token, _}}) do token end - defp peek_token_eat_eol(%{peek_token: {token, _, _, _, _, _, _}}) do + defp peek_token_eat_eoe(%{peek_token: {token, _, _, _, _, _, _}}) do token end - defp peek_token_eat_eol(%{peek_token: :eof}) do + defp peek_token_eat_eoe(%{peek_token: :eof}) do :eof end - defp peek_token_eat_eol(%{tokens: :eot}) do + defp peek_token_eat_eoe(%{tokens: :eot}) do :eof end + defp peek_token_skip_eoe(%{peek_token: {:eol, _}} = parser) do + peek_token_skip_eoe(next_token(parser)) + end + + defp peek_token_skip_eoe(%{peek_token: {:";", _}} = parser) do + peek_token_skip_eoe(next_token(parser)) + end + + defp peek_token_skip_eoe(parser) do + peek_token(parser) + end + defp current_token_type(%{tokens: :eot}) do :eot end diff --git a/test/spitfire_test.exs b/test/spitfire_test.exs index 4daecc6..a8fa2e4 100644 --- a/test/spitfire_test.exs +++ b/test/spitfire_test.exs @@ -42,7 +42,7 @@ defmodule SpitfireTest do some: :option def run(arg) do - bar() + bar() :ok end end @@ -157,7 +157,7 @@ defmodule SpitfireTest do test "parses strings" do code = ~s''' - "foobar" + "foobar" ''' assert Spitfire.parse(code) == s2q(code) @@ -173,7 +173,7 @@ defmodule SpitfireTest do test "parses charlists" do code = ~s''' - 'foobar' + 'foobar' ''' assert Spitfire.parse(code) == s2q(code) @@ -187,7 +187,7 @@ defmodule SpitfireTest do assert Spitfire.parse(code) == s2q(code) code = ~S''' - 'foo#{alice}bar' + 'foo#{alice}bar' ''' assert Spitfire.parse(code) == s2q(code) @@ -195,7 +195,7 @@ defmodule SpitfireTest do code = ~S''' 'foo#{ alice - }bar' + }bar' ''' assert Spitfire.parse(code) == s2q(code) @@ -940,7 +940,7 @@ defmodule SpitfireTest do ''', ~S''' if group_id do - [~S( data-group-id="), group_id, ~S(")] + [~S( data-group-id="), group_id, ~S(")] else [] end @@ -1039,10 +1039,10 @@ defmodule SpitfireTest do case infix do nil -> {left, parser} - + ^do_block when parser.nestings != [] -> {left, next_token(parser)} - + _ -> infix.(next_token(parser), left) end @@ -1979,7 +1979,7 @@ defmodule SpitfireTest do else :bob end - end + end """ assert Spitfire.parse(code) == s2q(code) @@ -2310,7 +2310,7 @@ defmodule SpitfireTest do test "missing end parentheses in function call" do code = ~S''' - foo(1 + + foo(1 + bar(two) ''' @@ -2331,7 +2331,7 @@ defmodule SpitfireTest do test "missing closing end to anon function and paren" do code = ~S''' - new_list = + new_list = Enum.map(some_list, fn item -> @@ -2807,29 +2807,6 @@ defmodule SpitfireTest do assert Enum.any?(errors, fn {_, msg} -> String.contains?(msg, ",") end) end - test "unexpected semicolon" do - code = ~S""" - defmodule MyModule do - import List - ; (__cursor__()) - end - """ - - assert {:error, _ast, errors} = Spitfire.parse(code) - assert Enum.any?(errors, fn {_, msg} -> String.contains?(msg, ";") end) - - code = ~S""" - defmodule MyModule do - foo - - ; bar - end - """ - - assert {:error, _ast, errors} = Spitfire.parse(code) - assert Enum.any?(errors, fn {_, msg} -> String.contains?(msg, ";") end) - end - test "semicolon in list/tuple/map" do assert {:error, _, errors} = Spitfire.parse("[;]") assert Enum.any?(errors, fn {_, msg} -> msg == "unexpected token: ;" end) @@ -2849,6 +2826,24 @@ defmodule SpitfireTest do assert {:ok, {:foo, _, nil}} = Spitfire.parse("; foo") end + test "semicolon as statement separator is valid" do + assert {:ok, _} = Spitfire.parse("foo\n; bar") + assert {:ok, _} = Spitfire.parse("foo\n\n; bar") + + assert {:ok, 1} = Spitfire.parse("(1;)") + assert {:ok, 1} = Spitfire.parse("(1;\n)") + assert {:ok, 1} = Spitfire.parse("(1\n;)") + + code = ~S""" + defmodule MyModule do + foo + ; bar + end + """ + + assert {:ok, _} = Spitfire.parse(code) + end + test "weird characters" do code = """ [«]