From 8d3c514a8334bf45e20c39cad06367204bfd5a5e Mon Sep 17 00:00:00 2001
From: ydah <t.yudai92@gmail.com>
Date: Sun, 21 Dec 2025 21:00:28 +0900
Subject: [PATCH 1/2] Support parser generation without %union directive (Bison
 compatibility)

## Motivation

When writing simple parser samples or prototypes, defining `%union` can be cumbersome and unnecessary. Bison allows users to generate
parsers without the `%union` directive, defaulting `YYSTYPE` to `int`. This PR brings the same convenience to lrama for better Bison
compatibility.

## How it works

When %union is not defined:
1. YYSTYPE defaults to int (same as Bison)
2. Semantic value references are generated without union member access
3. The parser works exactly like Bison-generated parsers
---
 lib/lrama/grammar/code/rule_action.rb         | 29 ++++++++---
 .../lrama/grammar/code/rule_action.rbs        |  3 ++
 spec/fixtures/common/no_union.y               | 25 ++++++++++
 spec/fixtures/common/no_union_with_type.y     | 27 ++++++++++
 spec/fixtures/integration/no_union.l          | 40 +++++++++++++++
 spec/fixtures/integration/no_union.y          | 49 +++++++++++++++++++
 spec/lrama/integration_spec.rb                |  6 +++
 spec/lrama/parser_spec.rb                     | 31 ++++++++++++
 8 files changed, 204 insertions(+), 6 deletions(-)
 create mode 100644 spec/fixtures/common/no_union.y
 create mode 100644 spec/fixtures/common/no_union_with_type.y
 create mode 100644 spec/fixtures/integration/no_union.l
 create mode 100644 spec/fixtures/integration/no_union.y

diff --git a/lib/lrama/grammar/code/rule_action.rb b/lib/lrama/grammar/code/rule_action.rb
index e71e93e5..753a9865 100644
--- a/lib/lrama/grammar/code/rule_action.rb
+++ b/lib/lrama/grammar/code/rule_action.rb
@@ -53,9 +53,15 @@ def reference_to_c(ref)
           case
           when ref.type == :dollar && ref.name == "$" # $$
             tag = ref.ex_tag || lhs.tag
-            raise_tag_not_found_error(ref) unless tag
-            # @type var tag: Lexer::Token::Tag
-            "(yyval.#{tag.member})"
+            if tag
+              # @type var tag: Lexer::Token::Tag
+              "(yyval.#{tag.member})"
+            elsif union_not_defined?
+              # When %union is not defined, YYSTYPE defaults to int
+              "(yyval)"
+            else
+              raise_tag_not_found_error(ref)
+            end
           when ref.type == :at && ref.name == "$" # @$
             "(yyloc)"
           when ref.type == :index && ref.name == "$" # $:$
@@ -63,9 +69,15 @@ def reference_to_c(ref)
           when ref.type == :dollar # $n
             i = -position_in_rhs + ref.index
             tag = ref.ex_tag || rhs[ref.index - 1].tag
-            raise_tag_not_found_error(ref) unless tag
-            # @type var tag: Lexer::Token::Tag
-            "(yyvsp[#{i}].#{tag.member})"
+            if tag
+              # @type var tag: Lexer::Token::Tag
+              "(yyvsp[#{i}].#{tag.member})"
+            elsif union_not_defined?
+              # When %union is not defined, YYSTYPE defaults to int
+              "(yyvsp[#{i}])"
+            else
+              raise_tag_not_found_error(ref)
+            end
           when ref.type == :at # @n
             i = -position_in_rhs + ref.index
             "(yylsp[#{i}])"
@@ -99,6 +111,11 @@ def lhs
           @rule.lhs
         end
 
+        # @rbs () -> bool
+        def union_not_defined?
+          lhs.tag.nil? && rhs.all? { |sym| sym.tag.nil? }
+        end
+
         # @rbs (Reference ref) -> bot
         def raise_tag_not_found_error(ref)
           raise "Tag is not specified for '$#{ref.value}' in '#{@rule.display_name}'"
diff --git a/sig/generated/lrama/grammar/code/rule_action.rbs b/sig/generated/lrama/grammar/code/rule_action.rbs
index c0e87ecd..94260806 100644
--- a/sig/generated/lrama/grammar/code/rule_action.rbs
+++ b/sig/generated/lrama/grammar/code/rule_action.rbs
@@ -55,6 +55,9 @@ module Lrama
         # @rbs () -> Grammar::Symbol
         def lhs: () -> Grammar::Symbol
 
+        # @rbs () -> bool
+        def union_not_defined?: () -> bool
+
         # @rbs (Reference ref) -> bot
         def raise_tag_not_found_error: (Reference ref) -> bot
       end
diff --git a/spec/fixtures/common/no_union.y b/spec/fixtures/common/no_union.y
new file mode 100644
index 00000000..df805034
--- /dev/null
+++ b/spec/fixtures/common/no_union.y
@@ -0,0 +1,25 @@
+/*
+ * Test case for parser without %union directive
+ */
+
+%{
+// Prologue
+%}
+
+%token NUMBER
+%token PLUS
+%token MINUS
+
+%%
+
+program: expr
+       ;
+
+expr: NUMBER
+    | expr PLUS NUMBER
+    | expr MINUS NUMBER
+    ;
+
+%%
+
+// Epilogue
diff --git a/spec/fixtures/common/no_union_with_type.y b/spec/fixtures/common/no_union_with_type.y
new file mode 100644
index 00000000..1bbeb5df
--- /dev/null
+++ b/spec/fixtures/common/no_union_with_type.y
@@ -0,0 +1,27 @@
+/*
+ * Test case for parser without %union but with typed tokens
+ */
+
+%{
+// Prologue
+%}
+
+%token <val> NUMBER
+%token PLUS
+%token MINUS
+
+%type <val> expr
+
+%%
+
+program: expr
+       ;
+
+expr: NUMBER
+    | expr PLUS NUMBER
+    | expr MINUS NUMBER
+    ;
+
+%%
+
+// Epilogue
diff --git a/spec/fixtures/integration/no_union.l b/spec/fixtures/integration/no_union.l
new file mode 100644
index 00000000..3e71252c
--- /dev/null
+++ b/spec/fixtures/integration/no_union.l
@@ -0,0 +1,40 @@
+%option noinput nounput noyywrap never-interactive bison-bridge bison-locations
+
+%{
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "no_union.h"
+
+%}
+
+NUMBER [0-9]+
+
+%%
+
+{NUMBER} {
+    ((void) yylloc);
+    *yylval = atoi(yytext);
+    return NUMBER;
+}
+
+[+\-] {
+    return yytext[0];
+}
+
+[\n|\r\n] {
+    return(YYEOF);
+}
+
+[[:space:]] {}
+
+<<EOF>> {
+    return(YYEOF);
+}
+
+. {
+    fprintf(stderr, "Illegal character '%s'\n", yytext);
+    return(YYEOF);
+}
+
+%%
diff --git a/spec/fixtures/integration/no_union.y b/spec/fixtures/integration/no_union.y
new file mode 100644
index 00000000..c4c3264b
--- /dev/null
+++ b/spec/fixtures/integration/no_union.y
@@ -0,0 +1,49 @@
+/*
+ * Integration test for parser without %union directive
+ * This test verifies that lrama can generate parsers without %union,
+ * just like Bison does (YYSTYPE defaults to int).
+ */
+
+%{
+#include <stdio.h>
+#include "no_union.h"
+#include "no_union-lexer.h"
+
+static int yyerror(YYLTYPE *loc, const char *str);
+%}
+
+%token NUMBER
+
+%locations
+
+%%
+
+program: /* empty */
+       | expr { printf("=> %d\n", $1); }
+       ;
+
+expr: NUMBER
+    | expr '+' NUMBER { $$ = $1 + $3; }
+    | expr '-' NUMBER { $$ = $1 - $3; }
+    ;
+
+%%
+
+static int yyerror(YYLTYPE *loc, const char *str)
+{
+  fprintf(stderr, "%d.%d-%d.%d: %s\n", loc->first_line, loc->first_column, loc->last_line, loc->last_column, str);
+  return 0;
+}
+
+int main(int argc, char *argv[])
+{
+  if (argc == 2) {
+    yy_scan_string(argv[1]);
+  }
+
+  if (yyparse()) {
+    fprintf(stderr, "syntax error\n");
+    return 1;
+  }
+  return 0;
+}
diff --git a/spec/lrama/integration_spec.rb b/spec/lrama/integration_spec.rb
index 52922a0f..11c719cf 100644
--- a/spec/lrama/integration_spec.rb
+++ b/spec/lrama/integration_spec.rb
@@ -70,6 +70,12 @@ def generate_object(grammar_file_path, c_path, obj_path, command_args: [])
     end
   end
 
+  describe "parser without %union (YYSTYPE defaults to int)" do
+    it "returns 6 for '1 + 2 + 3'" do
+      test_parser("no_union", "1 + 2 + 3", "=> 6\n")
+    end
+  end
+
   it "prologue and epilogue are optional" do
     test_parser("prologue_epilogue_optional", "", "")
   end
diff --git a/spec/lrama/parser_spec.rb b/spec/lrama/parser_spec.rb
index 9b2365ab..005e7b91 100644
--- a/spec/lrama/parser_spec.rb
+++ b/spec/lrama/parser_spec.rb
@@ -117,6 +117,37 @@
   end
 
   describe '#parse' do
+    it "no_union" do
+      path = "common/no_union.y"
+      y = File.read(fixture_path(path))
+      grammar = Lrama::Parser.new(y, path).parse
+      grammar.prepare
+      grammar.validate!
+
+      expect(grammar.union).to be_nil
+      expect(grammar.nterms.sort_by(&:number)).to match_symbols([
+        Sym.new(id: T::Ident.new(s_value: "$accept"), alias_name: nil, number: 6, tag: nil, term: false, token_id: 0, nullable: false),
+        Sym.new(id: T::Ident.new(s_value: "program"), alias_name: nil, number: 7, tag: nil, term: false, token_id: 1, nullable: false),
+        Sym.new(id: T::Ident.new(s_value: "expr"), alias_name: nil, number: 8, tag: nil, term: false, token_id: 2, nullable: false),
+      ])
+    end
+
+    it "no_union_with_type" do
+      path = "common/no_union_with_type.y"
+      y = File.read(fixture_path(path))
+      grammar = Lrama::Parser.new(y, path).parse
+      grammar.prepare
+      grammar.validate!
+
+      expect(grammar.union).to be_nil
+      expect(grammar.nterms.sort_by(&:number)).to match_symbols([
+        Sym.new(id: T::Ident.new(s_value: "$accept"), alias_name: nil, number: 6, tag: nil, term: false, token_id: 0, nullable: false),
+        Sym.new(id: T::Ident.new(s_value: "program"), alias_name: nil, number: 7, tag: nil, term: false, token_id: 1, nullable: false),
+        Sym.new(id: T::Ident.new(s_value: "expr"), alias_name: nil, number: 8, tag: T::Tag.new(s_value: "<val>"), term: false, token_id: 2, nullable: false),
+      ])
+      expect(grammar.terms.find {|t| t.id.s_value == "NUMBER" }.tag).to eq(T::Tag.new(s_value: "<val>"))
+    end
+
     it "basic" do
       path = "common/basic.y"
       y = File.read(fixture_path(path))

From 880859cd196b482853e0fcd7ed23267a7acad21d Mon Sep 17 00:00:00 2001
From: ydah <t.yudai92@gmail.com>
Date: Fri, 23 Jan 2026 07:51:27 +0900
Subject: [PATCH 2/2] Fix union_not_defined? to check grammar.union directly
 instead of relying on tag presence

---
 lib/lrama/grammar/code/rule_action.rb         |  8 +++--
 lib/lrama/grammar/rule.rb                     |  6 ++--
 lib/lrama/output.rb                           |  2 +-
 .../lrama/grammar/code/rule_action.rbs        |  6 ++--
 sig/generated/lrama/grammar/rule.rbs          |  4 +--
 spec/lrama/grammar/code_spec.rb               | 36 +++++++++----------
 6 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/lib/lrama/grammar/code/rule_action.rb b/lib/lrama/grammar/code/rule_action.rb
index 753a9865..24729a1e 100644
--- a/lib/lrama/grammar/code/rule_action.rb
+++ b/lib/lrama/grammar/code/rule_action.rb
@@ -11,11 +11,13 @@ class RuleAction < Code
         #
         # @rbs!
         #   @rule: Rule
+        #   @grammar: Grammar
 
-        # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule) -> void
-        def initialize(type:, token_code:, rule:)
+        # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule, grammar: Grammar) -> void
+        def initialize(type:, token_code:, rule:, grammar:)
           super(type: type, token_code: token_code)
           @rule = rule
+          @grammar = grammar
         end
 
         private
@@ -113,7 +115,7 @@ def lhs
 
         # @rbs () -> bool
         def union_not_defined?
-          lhs.tag.nil? && rhs.all? { |sym| sym.tag.nil? }
+          @grammar.union.nil?
         end
 
         # @rbs (Reference ref) -> bot
diff --git a/lib/lrama/grammar/rule.rb b/lib/lrama/grammar/rule.rb
index d00d6a88..b023b0e4 100644
--- a/lib/lrama/grammar/rule.rb
+++ b/lib/lrama/grammar/rule.rb
@@ -104,11 +104,11 @@ def initial_rule?
         id == 0
       end
 
-      # @rbs () -> String?
-      def translated_code
+      # @rbs (Grammar grammar) -> String?
+      def translated_code(grammar)
         return nil unless token_code
 
-        Code::RuleAction.new(type: :rule_action, token_code: token_code, rule: self).translated_code
+        Code::RuleAction.new(type: :rule_action, token_code: token_code, rule: self, grammar: grammar).translated_code
       end
 
       # @rbs () -> bool
diff --git a/lib/lrama/output.rb b/lib/lrama/output.rb
index d527be8b..24cf725c 100644
--- a/lib/lrama/output.rb
+++ b/lib/lrama/output.rb
@@ -246,7 +246,7 @@ def user_actions
         <<-STR
   case #{rule.id + 1}: /* #{rule.as_comment}  */
 #line #{code.line} "#{@grammar_file_path}"
-#{spaces}{#{rule.translated_code}}
+#{spaces}{#{rule.translated_code(@grammar)}}
 #line [@oline@] [@ofile@]
     break;
 
diff --git a/sig/generated/lrama/grammar/code/rule_action.rbs b/sig/generated/lrama/grammar/code/rule_action.rbs
index 94260806..ea4c89c4 100644
--- a/sig/generated/lrama/grammar/code/rule_action.rbs
+++ b/sig/generated/lrama/grammar/code/rule_action.rbs
@@ -6,8 +6,10 @@ module Lrama
       class RuleAction < Code
         @rule: Rule
 
-        # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule) -> void
-        def initialize: (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule) -> void
+        @grammar: Grammar
+
+        # @rbs (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule, grammar: Grammar) -> void
+        def initialize: (type: ::Symbol, token_code: Lexer::Token::UserCode, rule: Rule, grammar: Grammar) -> void
 
         private
 
diff --git a/sig/generated/lrama/grammar/rule.rbs b/sig/generated/lrama/grammar/rule.rbs
index d1073cc7..20724d21 100644
--- a/sig/generated/lrama/grammar/rule.rbs
+++ b/sig/generated/lrama/grammar/rule.rbs
@@ -68,8 +68,8 @@ module Lrama
       # @rbs () -> bool
       def initial_rule?: () -> bool
 
-      # @rbs () -> String?
-      def translated_code: () -> String?
+      # @rbs (Grammar grammar) -> String?
+      def translated_code: (Grammar grammar) -> String?
 
       # @rbs () -> bool
       def contains_at_reference?: () -> bool
diff --git a/spec/lrama/grammar/code_spec.rb b/spec/lrama/grammar/code_spec.rb
index b331d474..401b00fa 100644
--- a/spec/lrama/grammar/code_spec.rb
+++ b/spec/lrama/grammar/code_spec.rb
@@ -248,46 +248,46 @@
     describe "#translated_code" do
       it "translates '$$' to '(yyval)' with member" do
         code = grammar.rules.find {|r| r.lhs.id.s_value == "rule1" }
-        expect(code.translated_code).to eq(" (yyval.rule1) = 0; ")
+        expect(code.translated_code(grammar)).to eq(" (yyval.rule1) = 0; ")
       end
 
       it "translates '@$' to '(yyloc)'" do
         code = grammar.rules.find {|r| r.lhs.id.s_value == "rule2" }
-        expect(code.translated_code).to eq(" (yyloc) = 0; ")
+        expect(code.translated_code(grammar)).to eq(" (yyloc) = 0; ")
       end
 
       it "translates '$n' to '(yyvsp)' with index and member" do
         code = grammar.rules.find {|r| r.lhs.id.s_value == "rule3" }
-        expect(code.translated_code).to eq(" (yyvsp[-2].expr) + (yyvsp[0].expr); ")
+        expect(code.translated_code(grammar)).to eq(" (yyvsp[-2].expr) + (yyvsp[0].expr); ")
       end
 
       it "translates '@n' to '(yylsp)' with index" do
         code = grammar.rules.find {|r| r.lhs.id.s_value == "rule4" }
-        expect(code.translated_code).to eq(" (yylsp[-2]) + (yylsp[0]); (yylsp[-3]); ")
+        expect(code.translated_code(grammar)).to eq(" (yylsp[-2]) + (yylsp[0]); (yylsp[-3]); ")
       end
 
       it "respects explicit tag in a rule" do
         code = grammar.rules.find {|r| r.lhs.id.s_value == "rule5" }
-        expect(code.translated_code).to eq(" (yyvsp[-2].expr) + (yyvsp[0].integer); ")
+        expect(code.translated_code(grammar)).to eq(" (yyvsp[-2].expr) + (yyvsp[0].integer); ")
       end
 
       context "midrule action exists" do
         it "uses index on the original rule (-1)" do
           # midrule action in rule6
           code = grammar.rules.find {|r| r.lhs.id.s_value == "$@1" }
-          expect(code.translated_code).to eq(" (yyval.integer) = (yyvsp[-1].expr); (yyloc) = (yylsp[-1]); ")
+          expect(code.translated_code(grammar)).to eq(" (yyval.integer) = (yyvsp[-1].expr); (yyloc) = (yylsp[-1]); ")
 
           code = grammar.rules.find {|r| r.lhs.id.s_value == "rule6" }
-          expect(code.translated_code).to eq(" (yyvsp[-3].expr) + (yyvsp[0].integer); ")
+          expect(code.translated_code(grammar)).to eq(" (yyvsp[-3].expr) + (yyvsp[0].integer); ")
         end
 
         it "uses an explicit tag for type casting" do
           # midrule action in rule13
           code = grammar.rules.find {|r| r.lhs.id.s_value == "@5" }
-          expect(code.translated_code).to eq(" (yyval.integer) = (yyvsp[-1].expr); (yyloc) = (yylsp[-1]); ")
+          expect(code.translated_code(grammar)).to eq(" (yyval.integer) = (yyvsp[-1].expr); (yyloc) = (yylsp[-1]); ")
 
           code = grammar.rules.find {|r| r.lhs.id.s_value == "rule13" }
-          expect(code.translated_code).to eq(" (yyvsp[-3].expr) + (yyvsp[-1].integer); ")
+          expect(code.translated_code(grammar)).to eq(" (yyvsp[-3].expr) + (yyvsp[-1].integer); ")
         end
       end
 
@@ -296,38 +296,38 @@
           # midrule action in rule7
           # rule7 has tag
           code = grammar.rules.find {|r| r.lhs.id.s_value == "@2" }
-          expect { code.translated_code }.to raise_error("Tag is not specified for '$$' in '@2 -> ε'")
+          expect { code.translated_code(grammar) }.to raise_error("Tag is not specified for '$$' in '@2 -> ε'")
 
           code = grammar.rules.find {|r| r.lhs.id.s_value == "rule7" }
-          expect { code.translated_code }.to raise_error("Tag is not specified for '$2' in 'rule7 -> expr @2 '+' expr'")
+          expect { code.translated_code(grammar) }.to raise_error("Tag is not specified for '$2' in 'rule7 -> expr @2 '+' expr'")
 
           # midrule action in rule8
           # rule8 has no tag
           code = grammar.rules.find {|r| r.lhs.id.s_value == "@3" }
-          expect { code.translated_code }.to raise_error("Tag is not specified for '$$' in '@3 -> ε'")
+          expect { code.translated_code(grammar) }.to raise_error("Tag is not specified for '$$' in '@3 -> ε'")
 
           code = grammar.rules.find {|r| r.lhs.id.s_value == "rule8" }
-          expect { code.translated_code }.to raise_error("Tag is not specified for '$2' in 'rule8 -> expr @3 '+' expr'")
+          expect { code.translated_code(grammar) }.to raise_error("Tag is not specified for '$2' in 'rule8 -> expr @3 '+' expr'")
         end
       end
 
       context "$: is used" do
         it "translates '$:$' to '-yylen' and '$:n' to index from the last of array" do
           code = grammar.rules.find {|r| r.lhs.id.s_value == "rule9" }
-          expect(code.translated_code).to eq(" (-2 - 1); (-1 - 1); (0 - 1); ")
+          expect(code.translated_code(grammar)).to eq(" (-2 - 1); (-1 - 1); (0 - 1); ")
 
           code = grammar.rules.find {|r| r.lhs.id.s_value == "rule10" }
-          expect { code.translated_code }.to raise_error("$:$ is not supported")
+          expect { code.translated_code(grammar) }.to raise_error("$:$ is not supported")
 
           # midrule action in rule11
           code = grammar.rules.find {|r| r.lhs.id.s_value == "@4" }
-          expect(code.translated_code).to eq(" (0 - 1); ")
+          expect(code.translated_code(grammar)).to eq(" (0 - 1); ")
 
           code = grammar.rules.find {|r| r.lhs.id.s_value == "rule11" }
-          expect(code.translated_code).to eq(" (-3 - 1); (-2 - 1); (-1 - 1); (0 - 1); ")
+          expect(code.translated_code(grammar)).to eq(" (-3 - 1); (-2 - 1); (-1 - 1); (0 - 1); ")
 
           code = grammar.rules.find {|r| r.lhs.id.s_value == "rule12" }
-          expect(code.translated_code).to eq(" (-2 - 1); (-1 - 1); (0 - 1); ")
+          expect(code.translated_code(grammar)).to eq(" (-2 - 1); (-1 - 1); (0 - 1); ")
         end
       end
     end