Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,29 @@

## Lrama 0.8.0 (2026-xx-xx)

### Support `%define api.token.raw` directive

Support `%define api.token.raw` directive for Bison compatibility.
When enabled, external token numbers are unified with internal symbol numbers, eliminating the `yytranslate[]` array from generated code.

This feature is useful for:
- Reducing generated code size
- Simplifying token handling (YYTRANSLATE becomes identity function)
- Embedded systems with memory constraints
- Debugging with unified token numbers

```yacc
%define api.token.raw

%token NUM PLUS MINUS
%%
expr: NUM
| expr PLUS expr
;
```

Note: Character literals (`'+'`, `';'`, etc.) cannot be used with this directive.

## Lrama 0.7.1 (2025-12-24)

### Optimize IELR
Expand Down
17 changes: 14 additions & 3 deletions lib/lrama/context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,15 @@ def initialize(states)

# enum yytokentype
def yytokentype
@states.terms.reject do |term|
0 < term.token_id && term.token_id < 128
end.map do |term|
terms = if api_token_raw?
@states.terms
else
@states.terms.reject do |term|
0 < term.token_id && term.token_id < 128
end
end

terms.map do |term|
[term.id.s_value, term.token_id, term.display_name]
end.unshift(["YYEMPTY", -2, nil])
end
Expand Down Expand Up @@ -73,6 +79,11 @@ def yymaxutok
@states.terms.map(&:token_id).max
end

# Check if api.token.raw is enabled
def api_token_raw?
@states.api_token_raw?
end

# YYTRANSLATE
#
# yytranslate is a mapping from token id to symbol number
Expand Down
30 changes: 29 additions & 1 deletion lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def validate!
validate_no_precedence_for_nterm!
validate_rule_lhs_is_nterm!
validate_duplicated_precedence!
validate_api_token_raw!
end

# @rbs (Grammar::Symbol sym) -> Array[Rule]
Expand Down Expand Up @@ -304,6 +305,16 @@ def ielr_defined?
@define.key?('lr.type') && @define['lr.type'] == 'ielr'
end

# @rbs () -> bool
def api_token_raw?
return false unless @define.key?('api.token.raw')

value = @define['api.token.raw']
# When value is nil, empty string, or "true", it's enabled
# When value is "false", it's disabled
value != 'false'
end

private

# @rbs () -> void
Expand Down Expand Up @@ -529,7 +540,7 @@ def fill_default_precedence

# @rbs () -> Array[Grammar::Symbol]
def fill_symbols
fill_symbol_number
fill_symbol_number(api_token_raw: api_token_raw?)
fill_nterm_type(@types)
fill_printer(@printers)
fill_destructor(@destructors)
Expand Down Expand Up @@ -595,6 +606,23 @@ def validate_duplicated_precedence!
raise errors.join("\n")
end

# @rbs () -> void
def validate_api_token_raw!
return unless api_token_raw?

errors = [] #: Array[String]

terms.each do |term|
next unless term.id.is_a?(Lrama::Lexer::Token::Char)

errors << "character literal #{term.id.s_value} cannot be used with %define api.token.raw (line: #{term.id.first_line})"
end

return if errors.empty?

raise errors.join("\n")
end

# @rbs () -> void
def set_locations
@locations = @locations || @rules.any? {|rule| rule.contains_at_reference? }
Expand Down
35 changes: 32 additions & 3 deletions lib/lrama/grammar/symbols/resolver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Resolver
# def token_to_symbol: (Lexer::Token::Base token) -> Grammar::Symbol
# def find_symbol_by_s_value!: (::String s_value) -> Grammar::Symbol
# def fill_nterm_type: (Array[Grammar::Type] types) -> void
# def fill_symbol_number: () -> void
# def fill_symbol_number: (?api_token_raw: bool) -> void
# def fill_printer: (Array[Grammar::Printer] printers) -> void
# def fill_destructor: (Array[Destructor] destructors) -> (Destructor | bot)
# def fill_error_token: (Array[Grammar::ErrorToken] error_tokens) -> void
Expand Down Expand Up @@ -130,13 +130,14 @@ def find_symbol_by_number!(number)
sym
end

# @rbs () -> void
def fill_symbol_number
# @rbs (?api_token_raw: bool) -> void
def fill_symbol_number(api_token_raw: false)
# YYEMPTY = -2
# YYEOF = 0
# YYerror = 1
# YYUNDEF = 2
@number = 3
@api_token_raw = api_token_raw
fill_terms_number
fill_nterms_number
end
Expand Down Expand Up @@ -231,6 +232,34 @@ def find_nterm_by_id!(id)

# @rbs () -> void
def fill_terms_number
if @api_token_raw
fill_terms_number_raw
else
fill_terms_number_normal
end
end

# @rbs () -> void
def fill_terms_number_raw
@terms.each do |sym|
while used_numbers[@number] do
@number += 1
end

if sym.number.nil?
sym.number = @number
used_numbers[@number] = true
@number += 1
end

if sym.token_id.nil?
sym.token_id = sym.number
end
end
end

# @rbs () -> void
def fill_terms_number_normal
# Character literal in grammar file has
# token id corresponding to ASCII code by default,
# so start token_id from 256.
Expand Down
2 changes: 1 addition & 1 deletion lib/lrama/output.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Output
def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
:yymaxutok, :yypact_ninf, :yytable_ninf

def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :api_token_raw?

def initialize(
out:, output_file_path:, template_name:, grammar_file_path:,
Expand Down
2 changes: 1 addition & 1 deletion lib/lrama/states.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class States
include Lrama::Tracer::Duration

def_delegators "@grammar", :symbols, :terms, :nterms, :rules, :precedences,
:accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value!, :ielr_defined?
:accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value!, :ielr_defined?, :api_token_raw?

attr_reader :states #: Array[State]
attr_reader :reads_relation #: Hash[State::Action::Goto, Array[State::Action::Goto]]
Expand Down
6 changes: 6 additions & 0 deletions sig/generated/lrama/grammar.rbs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 9 additions & 3 deletions sig/generated/lrama/grammar/symbols/resolver.rbs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 60 additions & 0 deletions spec/fixtures/integration/api_token_raw.l
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
%option noinput nounput noyywrap never-interactive bison-bridge bison-locations

%{

#include <stdio.h>
#include <stdlib.h>
#include "api_token_raw.h"

%}

NUMBER [0-9]+

%%

{NUMBER} {
((void) yylloc);
yylval->val = atoi(yytext);
return NUM;
}

"+" {
return PLUS;
}

"-" {
return MINUS;
}

"*" {
return STAR;
}

"/" {
return SLASH;
}

"(" {
return LPAREN;
}

")" {
return RPAREN;
}

[\n|\r\n] {
return(YYEOF);
}

[[:space:]] {}

<<EOF>> {
return(YYEOF);
}

. {
fprintf(stderr, "Illegal character '%s'\n", yytext);
return(YYEOF);
}

%%
55 changes: 55 additions & 0 deletions spec/fixtures/integration/api_token_raw.y
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
%{

#include <stdio.h>
#include "api_token_raw.h"
#include "api_token_raw-lexer.h"

static int yyerror(YYLTYPE *loc, const char *str);

%}

%define api.token.raw

%union {
int val;
}

%token <val> NUM
%token PLUS MINUS STAR SLASH LPAREN RPAREN
%type <val> expr
%left PLUS MINUS
%left STAR SLASH

%locations

%%

program : /* empty */
| expr { printf("=> %d", $1); }
;
expr : NUM
| expr PLUS expr { $$ = $1 + $3; }
| expr MINUS expr { $$ = $1 - $3; }
| expr STAR expr { $$ = $1 * $3; }
| expr SLASH expr { $$ = $1 / $3; }
| LPAREN expr RPAREN { $$ = $2; }
;

%%

static int yyerror(YYLTYPE *loc, const char *str) {
fprintf(stderr, "parse error: %s\n", str);
return 0;
}

int main(int argc, char *argv[]) {
if (argc == 2) {
yy_scan_string(argv[1]);
}

if (yyparse()) {
fprintf(stderr, "syntax error\n");
return 1;
}
return 0;
}
Loading