From 8d3c514a8334bf45e20c39cad06367204bfd5a5e Mon Sep 17 00:00:00 2001 From: ydah Date: Sun, 21 Dec 2025 21:00:28 +0900 Subject: [PATCH 1/2] Support parser generation without %union directive (Bison compatibility) ## Motivation When writing simple parser samples or prototypes, defining `%union` can be cumbersome and unnecessary. Bison allows users to generate parsers without the `%union` directive, defaulting `YYSTYPE` to `int`. This PR brings the same convenience to lrama for better Bison compatibility. ## How it works When %union is not defined: 1. YYSTYPE defaults to int (same as Bison) 2. Semantic value references are generated without union member access 3. The parser works exactly like Bison-generated parsers --- lib/lrama/grammar/code/rule_action.rb | 29 ++++++++--- .../lrama/grammar/code/rule_action.rbs | 3 ++ spec/fixtures/common/no_union.y | 25 ++++++++++ spec/fixtures/common/no_union_with_type.y | 27 ++++++++++ spec/fixtures/integration/no_union.l | 40 +++++++++++++++ spec/fixtures/integration/no_union.y | 49 +++++++++++++++++++ spec/lrama/integration_spec.rb | 6 +++ spec/lrama/parser_spec.rb | 31 ++++++++++++ 8 files changed, 204 insertions(+), 6 deletions(-) create mode 100644 spec/fixtures/common/no_union.y create mode 100644 spec/fixtures/common/no_union_with_type.y create mode 100644 spec/fixtures/integration/no_union.l create mode 100644 spec/fixtures/integration/no_union.y diff --git a/lib/lrama/grammar/code/rule_action.rb b/lib/lrama/grammar/code/rule_action.rb index e71e93e5..753a9865 100644 --- a/lib/lrama/grammar/code/rule_action.rb +++ b/lib/lrama/grammar/code/rule_action.rb @@ -53,9 +53,15 @@ def reference_to_c(ref) case when ref.type == :dollar && ref.name == "$" # $$ tag = ref.ex_tag || lhs.tag - raise_tag_not_found_error(ref) unless tag - # @type var tag: Lexer::Token::Tag - "(yyval.#{tag.member})" + if tag + # @type var tag: Lexer::Token::Tag + "(yyval.#{tag.member})" + elsif union_not_defined? + # When %union is not defined, YYSTYPE defaults to int + "(yyval)" + else + raise_tag_not_found_error(ref) + end when ref.type == :at && ref.name == "$" # @$ "(yyloc)" when ref.type == :index && ref.name == "$" # $:$ @@ -63,9 +69,15 @@ def reference_to_c(ref) when ref.type == :dollar # $n i = -position_in_rhs + ref.index tag = ref.ex_tag || rhs[ref.index - 1].tag - raise_tag_not_found_error(ref) unless tag - # @type var tag: Lexer::Token::Tag - "(yyvsp[#{i}].#{tag.member})" + if tag + # @type var tag: Lexer::Token::Tag + "(yyvsp[#{i}].#{tag.member})" + elsif union_not_defined? + # When %union is not defined, YYSTYPE defaults to int + "(yyvsp[#{i}])" + else + raise_tag_not_found_error(ref) + end when ref.type == :at # @n i = -position_in_rhs + ref.index "(yylsp[#{i}])" @@ -99,6 +111,11 @@ def lhs @rule.lhs end + # @rbs () -> bool + def union_not_defined? + lhs.tag.nil? && rhs.all? { |sym| sym.tag.nil? } + end + # @rbs (Reference ref) -> bot def raise_tag_not_found_error(ref) raise "Tag is not specified for '$#{ref.value}' in '#{@rule.display_name}'" diff --git a/sig/generated/lrama/grammar/code/rule_action.rbs b/sig/generated/lrama/grammar/code/rule_action.rbs index c0e87ecd..94260806 100644 --- a/sig/generated/lrama/grammar/code/rule_action.rbs +++ b/sig/generated/lrama/grammar/code/rule_action.rbs @@ -55,6 +55,9 @@ module Lrama # @rbs () -> Grammar::Symbol def lhs: () -> Grammar::Symbol + # @rbs () -> bool + def union_not_defined?: () -> bool + # @rbs (Reference ref) -> bot def raise_tag_not_found_error: (Reference ref) -> bot end diff --git a/spec/fixtures/common/no_union.y b/spec/fixtures/common/no_union.y new file mode 100644 index 00000000..df805034 --- /dev/null +++ b/spec/fixtures/common/no_union.y @@ -0,0 +1,25 @@ +/* + * Test case for parser without %union directive + */ + +%{ +// Prologue +%} + +%token NUMBER +%token PLUS +%token MINUS + +%% + +program: expr + ; + +expr: NUMBER + | expr PLUS NUMBER + | expr MINUS NUMBER + ; + +%% + +// Epilogue diff --git a/spec/fixtures/common/no_union_with_type.y b/spec/fixtures/common/no_union_with_type.y new file mode 100644 index 00000000..1bbeb5df --- /dev/null +++ b/spec/fixtures/common/no_union_with_type.y @@ -0,0 +1,27 @@ +/* + * Test case for parser without %union but with typed tokens + */ + +%{ +// Prologue +%} + +%token NUMBER +%token PLUS +%token MINUS + +%type expr + +%% + +program: expr + ; + +expr: NUMBER + | expr PLUS NUMBER + | expr MINUS NUMBER + ; + +%% + +// Epilogue diff --git a/spec/fixtures/integration/no_union.l b/spec/fixtures/integration/no_union.l new file mode 100644 index 00000000..3e71252c --- /dev/null +++ b/spec/fixtures/integration/no_union.l @@ -0,0 +1,40 @@ +%option noinput nounput noyywrap never-interactive bison-bridge bison-locations + +%{ + +#include +#include +#include "no_union.h" + +%} + +NUMBER [0-9]+ + +%% + +{NUMBER} { + ((void) yylloc); + *yylval = atoi(yytext); + return NUMBER; +} + +[+\-] { + return yytext[0]; +} + +[\n|\r\n] { + return(YYEOF); +} + +[[:space:]] {} + +<> { + return(YYEOF); +} + +. { + fprintf(stderr, "Illegal character '%s'\n", yytext); + return(YYEOF); +} + +%% diff --git a/spec/fixtures/integration/no_union.y b/spec/fixtures/integration/no_union.y new file mode 100644 index 00000000..c4c3264b --- /dev/null +++ b/spec/fixtures/integration/no_union.y @@ -0,0 +1,49 @@ +/* + * Integration test for parser without %union directive + * This test verifies that lrama can generate parsers without %union, + * just like Bison does (YYSTYPE defaults to int). + */ + +%{ +#include +#include "no_union.h" +#include "no_union-lexer.h" + +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%token NUMBER + +%locations + +%% + +program: /* empty */ + | expr { printf("=> %d\n", $1); } + ; + +expr: NUMBER + | expr '+' NUMBER { $$ = $1 + $3; } + | expr '-' NUMBER { $$ = $1 - $3; } + ; + +%% + +static int yyerror(YYLTYPE *loc, const char *str) +{ + fprintf(stderr, "%d.%d-%d.%d: %s\n", loc->first_line, loc->first_column, loc->last_line, loc->last_column, str); + return 0; +} + +int main(int argc, char *argv[]) +{ + if (argc == 2) { + yy_scan_string(argv[1]); + } + + if (yyparse()) { + fprintf(stderr, "syntax error\n"); + return 1; + } + return 0; +} diff --git a/spec/lrama/integration_spec.rb b/spec/lrama/integration_spec.rb index 52922a0f..11c719cf 100644 --- a/spec/lrama/integration_spec.rb +++ b/spec/lrama/integration_spec.rb @@ -70,6 +70,12 @@ def generate_object(grammar_file_path, c_path, obj_path, command_args: []) end end + describe "parser without %union (YYSTYPE defaults to int)" do + it "returns 6 for '1 + 2 + 3'" do + test_parser("no_union", "1 + 2 + 3", "=> 6\n") + end + end + it "prologue and epilogue are optional" do test_parser("prologue_epilogue_optional", "", "") end diff --git a/spec/lrama/parser_spec.rb b/spec/lrama/parser_spec.rb index 9b2365ab..005e7b91 100644 --- a/spec/lrama/parser_spec.rb +++ b/spec/lrama/parser_spec.rb @@ -117,6 +117,37 @@ end describe '#parse' do + it "no_union" do + path = "common/no_union.y" + y = File.read(fixture_path(path)) + grammar = Lrama::Parser.new(y, path).parse + grammar.prepare + grammar.validate! + + expect(grammar.union).to be_nil + expect(grammar.nterms.sort_by(&:number)).to match_symbols([ + Sym.new(id: T::Ident.new(s_value: "$accept"), alias_name: nil, number: 6, tag: nil, term: false, token_id: 0, nullable: false), + Sym.new(id: T::Ident.new(s_value: "program"), alias_name: nil, number: 7, tag: nil, term: false, token_id: 1, nullable: false), + Sym.new(id: T::Ident.new(s_value: "expr"), alias_name: nil, number: 8, tag: nil, term: false, token_id: 2, nullable: false), + ]) + end + + it "no_union_with_type" do + path = "common/no_union_with_type.y" + y = File.read(fixture_path(path)) + grammar = Lrama::Parser.new(y, path).parse + grammar.prepare + grammar.validate! + + expect(grammar.union).to be_nil + expect(grammar.nterms.sort_by(&:number)).to match_symbols([ + Sym.new(id: T::Ident.new(s_value: "$accept"), alias_name: nil, number: 6, tag: nil, term: false, token_id: 0, nullable: false), + Sym.new(id: T::Ident.new(s_value: "program"), alias_name: nil, number: 7, tag: nil, term: false, token_id: 1, nullable: false), + Sym.new(id: T::Ident.new(s_value: "expr"), alias_name: nil, number: 8, tag: T::Tag.new(s_value: ""), term: false, token_id: 2, nullable: false), + ]) + expect(grammar.terms.find {|t| t.id.s_value == "NUMBER" }.tag).to eq(T::Tag.new(s_value: "")) + end + it "basic" do path = "common/basic.y" y = File.read(fixture_path(path)) From 880859cd196b482853e0fcd7ed23267a7acad21d Mon Sep 17 00:00:00 2001 From: ydah Date: Fri, 23 Jan 2026 07:51:27 +0900 Subject: [PATCH 2/2] Fix union_not_defined? to check grammar.union directly instead of relying on tag presence --- lib/lrama/grammar/code/rule_action.rb | 8 +++-- lib/lrama/grammar/rule.rb | 6 ++-- lib/lrama/output.rb | 2 +- .../lrama/grammar/code/rule_action.rbs | 6 ++-- sig/generated/lrama/grammar/rule.rbs | 4 +-- spec/lrama/grammar/code_spec.rb | 36 +++++++++---------- 6 files changed, 33 insertions(+), 29 deletions(-) diff --git a/lib/lrama/grammar/code/rule_action.rb b/lib/lrama/grammar/code/rule_action.rb index 753a9865..24729a1e 100644 --- a/lib/lrama/grammar/code/rule_action.rb +++ b/lib/lrama/grammar/code/rule_action.rb @@ -11,11 +11,13 @@ class RuleAction < Code # # @rbs! # @rule: Rule + # @grammar: Grammar - # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule) -> void - def initialize(type:, token_code:, rule:) + # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule, grammar: Grammar) -> void + def initialize(type:, token_code:, rule:, grammar:) super(type: type, token_code: token_code) @rule = rule + @grammar = grammar end private @@ -113,7 +115,7 @@ def lhs # @rbs () -> bool def union_not_defined? - lhs.tag.nil? && rhs.all? { |sym| sym.tag.nil? } + @grammar.union.nil? end # @rbs (Reference ref) -> bot diff --git a/lib/lrama/grammar/rule.rb b/lib/lrama/grammar/rule.rb index d00d6a88..b023b0e4 100644 --- a/lib/lrama/grammar/rule.rb +++ b/lib/lrama/grammar/rule.rb @@ -104,11 +104,11 @@ def initial_rule? id == 0 end - # @rbs () -> String? - def translated_code + # @rbs (Grammar grammar) -> String? + def translated_code(grammar) return nil unless token_code - Code::RuleAction.new(type: :rule_action, token_code: token_code, rule: self).translated_code + Code::RuleAction.new(type: :rule_action, token_code: token_code, rule: self, grammar: grammar).translated_code end # @rbs () -> bool diff --git a/lib/lrama/output.rb b/lib/lrama/output.rb index d527be8b..24cf725c 100644 --- a/lib/lrama/output.rb +++ b/lib/lrama/output.rb @@ -246,7 +246,7 @@ def user_actions <<-STR case #{rule.id + 1}: /* #{rule.as_comment} */ #line #{code.line} "#{@grammar_file_path}" -#{spaces}{#{rule.translated_code}} +#{spaces}{#{rule.translated_code(@grammar)}} #line [@oline@] [@ofile@] break; diff --git a/sig/generated/lrama/grammar/code/rule_action.rbs b/sig/generated/lrama/grammar/code/rule_action.rbs index 94260806..ea4c89c4 100644 --- a/sig/generated/lrama/grammar/code/rule_action.rbs +++ b/sig/generated/lrama/grammar/code/rule_action.rbs @@ -6,8 +6,10 @@ module Lrama class RuleAction < Code @rule: Rule - # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule) -> void - def initialize: (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule) -> void + @grammar: Grammar + + # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule, grammar: Grammar) -> void + def initialize: (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule, grammar: Grammar) -> void private diff --git a/sig/generated/lrama/grammar/rule.rbs b/sig/generated/lrama/grammar/rule.rbs index d1073cc7..20724d21 100644 --- a/sig/generated/lrama/grammar/rule.rbs +++ b/sig/generated/lrama/grammar/rule.rbs @@ -68,8 +68,8 @@ module Lrama # @rbs () -> bool def initial_rule?: () -> bool - # @rbs () -> String? - def translated_code: () -> String? + # @rbs (Grammar grammar) -> String? + def translated_code: (Grammar grammar) -> String? # @rbs () -> bool def contains_at_reference?: () -> bool diff --git a/spec/lrama/grammar/code_spec.rb b/spec/lrama/grammar/code_spec.rb index b331d474..401b00fa 100644 --- a/spec/lrama/grammar/code_spec.rb +++ b/spec/lrama/grammar/code_spec.rb @@ -248,46 +248,46 @@ describe "#translated_code" do it "translates '$$' to '(yyval)' with member" do code = grammar.rules.find {|r| r.lhs.id.s_value == "rule1" } - expect(code.translated_code).to eq(" (yyval.rule1) = 0; ") + expect(code.translated_code(grammar)).to eq(" (yyval.rule1) = 0; ") end it "translates '@$' to '(yyloc)'" do code = grammar.rules.find {|r| r.lhs.id.s_value == "rule2" } - expect(code.translated_code).to eq(" (yyloc) = 0; ") + expect(code.translated_code(grammar)).to eq(" (yyloc) = 0; ") end it "translates '$n' to '(yyvsp)' with index and member" do code = grammar.rules.find {|r| r.lhs.id.s_value == "rule3" } - expect(code.translated_code).to eq(" (yyvsp[-2].expr) + (yyvsp[0].expr); ") + expect(code.translated_code(grammar)).to eq(" (yyvsp[-2].expr) + (yyvsp[0].expr); ") end it "translates '@n' to '(yylsp)' with index" do code = grammar.rules.find {|r| r.lhs.id.s_value == "rule4" } - expect(code.translated_code).to eq(" (yylsp[-2]) + (yylsp[0]); (yylsp[-3]); ") + expect(code.translated_code(grammar)).to eq(" (yylsp[-2]) + (yylsp[0]); (yylsp[-3]); ") end it "respects explicit tag in a rule" do code = grammar.rules.find {|r| r.lhs.id.s_value == "rule5" } - expect(code.translated_code).to eq(" (yyvsp[-2].expr) + (yyvsp[0].integer); ") + expect(code.translated_code(grammar)).to eq(" (yyvsp[-2].expr) + (yyvsp[0].integer); ") end context "midrule action exists" do it "uses index on the original rule (-1)" do # midrule action in rule6 code = grammar.rules.find {|r| r.lhs.id.s_value == "$@1" } - expect(code.translated_code).to eq(" (yyval.integer) = (yyvsp[-1].expr); (yyloc) = (yylsp[-1]); ") + expect(code.translated_code(grammar)).to eq(" (yyval.integer) = (yyvsp[-1].expr); (yyloc) = (yylsp[-1]); ") code = grammar.rules.find {|r| r.lhs.id.s_value == "rule6" } - expect(code.translated_code).to eq(" (yyvsp[-3].expr) + (yyvsp[0].integer); ") + expect(code.translated_code(grammar)).to eq(" (yyvsp[-3].expr) + (yyvsp[0].integer); ") end it "uses an explicit tag for type casting" do # midrule action in rule13 code = grammar.rules.find {|r| r.lhs.id.s_value == "@5" } - expect(code.translated_code).to eq(" (yyval.integer) = (yyvsp[-1].expr); (yyloc) = (yylsp[-1]); ") + expect(code.translated_code(grammar)).to eq(" (yyval.integer) = (yyvsp[-1].expr); (yyloc) = (yylsp[-1]); ") code = grammar.rules.find {|r| r.lhs.id.s_value == "rule13" } - expect(code.translated_code).to eq(" (yyvsp[-3].expr) + (yyvsp[-1].integer); ") + expect(code.translated_code(grammar)).to eq(" (yyvsp[-3].expr) + (yyvsp[-1].integer); ") end end @@ -296,38 +296,38 @@ # midrule action in rule7 # rule7 has tag code = grammar.rules.find {|r| r.lhs.id.s_value == "@2" } - expect { code.translated_code }.to raise_error("Tag is not specified for '$$' in '@2 -> ε'") + expect { code.translated_code(grammar) }.to raise_error("Tag is not specified for '$$' in '@2 -> ε'") code = grammar.rules.find {|r| r.lhs.id.s_value == "rule7" } - expect { code.translated_code }.to raise_error("Tag is not specified for '$2' in 'rule7 -> expr @2 '+' expr'") + expect { code.translated_code(grammar) }.to raise_error("Tag is not specified for '$2' in 'rule7 -> expr @2 '+' expr'") # midrule action in rule8 # rule8 has no tag code = grammar.rules.find {|r| r.lhs.id.s_value == "@3" } - expect { code.translated_code }.to raise_error("Tag is not specified for '$$' in '@3 -> ε'") + expect { code.translated_code(grammar) }.to raise_error("Tag is not specified for '$$' in '@3 -> ε'") code = grammar.rules.find {|r| r.lhs.id.s_value == "rule8" } - expect { code.translated_code }.to raise_error("Tag is not specified for '$2' in 'rule8 -> expr @3 '+' expr'") + expect { code.translated_code(grammar) }.to raise_error("Tag is not specified for '$2' in 'rule8 -> expr @3 '+' expr'") end end context "$: is used" do it "translates '$:$' to '-yylen' and '$:n' to index from the last of array" do code = grammar.rules.find {|r| r.lhs.id.s_value == "rule9" } - expect(code.translated_code).to eq(" (-2 - 1); (-1 - 1); (0 - 1); ") + expect(code.translated_code(grammar)).to eq(" (-2 - 1); (-1 - 1); (0 - 1); ") code = grammar.rules.find {|r| r.lhs.id.s_value == "rule10" } - expect { code.translated_code }.to raise_error("$:$ is not supported") + expect { code.translated_code(grammar) }.to raise_error("$:$ is not supported") # midrule action in rule11 code = grammar.rules.find {|r| r.lhs.id.s_value == "@4" } - expect(code.translated_code).to eq(" (0 - 1); ") + expect(code.translated_code(grammar)).to eq(" (0 - 1); ") code = grammar.rules.find {|r| r.lhs.id.s_value == "rule11" } - expect(code.translated_code).to eq(" (-3 - 1); (-2 - 1); (-1 - 1); (0 - 1); ") + expect(code.translated_code(grammar)).to eq(" (-3 - 1); (-2 - 1); (-1 - 1); (0 - 1); ") code = grammar.rules.find {|r| r.lhs.id.s_value == "rule12" } - expect(code.translated_code).to eq(" (-2 - 1); (-1 - 1); (0 - 1); ") + expect(code.translated_code(grammar)).to eq(" (-2 - 1); (-1 - 1); (0 - 1); ") end end end