Skip to content

Commit 8d3c514

Browse files
committed
Support parser generation without %union directive (Bison compatibility)
## Motivation When writing simple parser samples or prototypes, defining `%union` can be cumbersome and unnecessary. Bison allows users to generate parsers without the `%union` directive, defaulting `YYSTYPE` to `int`. This PR brings the same convenience to lrama for better Bison compatibility. ## How it works When %union is not defined: 1. YYSTYPE defaults to int (same as Bison) 2. Semantic value references are generated without union member access 3. The parser works exactly like Bison-generated parsers
1 parent ec4238a commit 8d3c514

File tree

8 files changed

+204
-6
lines changed

8 files changed

+204
-6
lines changed

lib/lrama/grammar/code/rule_action.rb

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,31 @@ def reference_to_c(ref)
5353
case
5454
when ref.type == :dollar && ref.name == "$" # $$
5555
tag = ref.ex_tag || lhs.tag
56-
raise_tag_not_found_error(ref) unless tag
57-
# @type var tag: Lexer::Token::Tag
58-
"(yyval.#{tag.member})"
56+
if tag
57+
# @type var tag: Lexer::Token::Tag
58+
"(yyval.#{tag.member})"
59+
elsif union_not_defined?
60+
# When %union is not defined, YYSTYPE defaults to int
61+
"(yyval)"
62+
else
63+
raise_tag_not_found_error(ref)
64+
end
5965
when ref.type == :at && ref.name == "$" # @$
6066
"(yyloc)"
6167
when ref.type == :index && ref.name == "$" # $:$
6268
raise "$:$ is not supported"
6369
when ref.type == :dollar # $n
6470
i = -position_in_rhs + ref.index
6571
tag = ref.ex_tag || rhs[ref.index - 1].tag
66-
raise_tag_not_found_error(ref) unless tag
67-
# @type var tag: Lexer::Token::Tag
68-
"(yyvsp[#{i}].#{tag.member})"
72+
if tag
73+
# @type var tag: Lexer::Token::Tag
74+
"(yyvsp[#{i}].#{tag.member})"
75+
elsif union_not_defined?
76+
# When %union is not defined, YYSTYPE defaults to int
77+
"(yyvsp[#{i}])"
78+
else
79+
raise_tag_not_found_error(ref)
80+
end
6981
when ref.type == :at # @n
7082
i = -position_in_rhs + ref.index
7183
"(yylsp[#{i}])"
@@ -99,6 +111,11 @@ def lhs
99111
@rule.lhs
100112
end
101113

114+
# @rbs () -> bool
115+
def union_not_defined?
116+
lhs.tag.nil? && rhs.all? { |sym| sym.tag.nil? }
117+
end
118+
102119
# @rbs (Reference ref) -> bot
103120
def raise_tag_not_found_error(ref)
104121
raise "Tag is not specified for '$#{ref.value}' in '#{@rule.display_name}'"

sig/generated/lrama/grammar/code/rule_action.rbs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

spec/fixtures/common/no_union.y

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Test case for parser without %union directive
3+
*/
4+
5+
%{
6+
// Prologue
7+
%}
8+
9+
%token NUMBER
10+
%token PLUS
11+
%token MINUS
12+
13+
%%
14+
15+
program: expr
16+
;
17+
18+
expr: NUMBER
19+
| expr PLUS NUMBER
20+
| expr MINUS NUMBER
21+
;
22+
23+
%%
24+
25+
// Epilogue
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Test case for parser without %union but with typed tokens
3+
*/
4+
5+
%{
6+
// Prologue
7+
%}
8+
9+
%token <val> NUMBER
10+
%token PLUS
11+
%token MINUS
12+
13+
%type <val> expr
14+
15+
%%
16+
17+
program: expr
18+
;
19+
20+
expr: NUMBER
21+
| expr PLUS NUMBER
22+
| expr MINUS NUMBER
23+
;
24+
25+
%%
26+
27+
// Epilogue
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
%option noinput nounput noyywrap never-interactive bison-bridge bison-locations
2+
3+
%{
4+
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include "no_union.h"
8+
9+
%}
10+
11+
NUMBER [0-9]+
12+
13+
%%
14+
15+
{NUMBER} {
16+
((void) yylloc);
17+
*yylval = atoi(yytext);
18+
return NUMBER;
19+
}
20+
21+
[+\-] {
22+
return yytext[0];
23+
}
24+
25+
[\n|\r\n] {
26+
return(YYEOF);
27+
}
28+
29+
[[:space:]] {}
30+
31+
<<EOF>> {
32+
return(YYEOF);
33+
}
34+
35+
. {
36+
fprintf(stderr, "Illegal character '%s'\n", yytext);
37+
return(YYEOF);
38+
}
39+
40+
%%
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Integration test for parser without %union directive
3+
* This test verifies that lrama can generate parsers without %union,
4+
* just like Bison does (YYSTYPE defaults to int).
5+
*/
6+
7+
%{
8+
#include <stdio.h>
9+
#include "no_union.h"
10+
#include "no_union-lexer.h"
11+
12+
static int yyerror(YYLTYPE *loc, const char *str);
13+
%}
14+
15+
%token NUMBER
16+
17+
%locations
18+
19+
%%
20+
21+
program: /* empty */
22+
| expr { printf("=> %d\n", $1); }
23+
;
24+
25+
expr: NUMBER
26+
| expr '+' NUMBER { $$ = $1 + $3; }
27+
| expr '-' NUMBER { $$ = $1 - $3; }
28+
;
29+
30+
%%
31+
32+
static int yyerror(YYLTYPE *loc, const char *str)
33+
{
34+
fprintf(stderr, "%d.%d-%d.%d: %s\n", loc->first_line, loc->first_column, loc->last_line, loc->last_column, str);
35+
return 0;
36+
}
37+
38+
int main(int argc, char *argv[])
39+
{
40+
if (argc == 2) {
41+
yy_scan_string(argv[1]);
42+
}
43+
44+
if (yyparse()) {
45+
fprintf(stderr, "syntax error\n");
46+
return 1;
47+
}
48+
return 0;
49+
}

spec/lrama/integration_spec.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ def generate_object(grammar_file_path, c_path, obj_path, command_args: [])
7070
end
7171
end
7272

73+
describe "parser without %union (YYSTYPE defaults to int)" do
74+
it "returns 6 for '1 + 2 + 3'" do
75+
test_parser("no_union", "1 + 2 + 3", "=> 6\n")
76+
end
77+
end
78+
7379
it "prologue and epilogue are optional" do
7480
test_parser("prologue_epilogue_optional", "", "")
7581
end

spec/lrama/parser_spec.rb

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,37 @@
117117
end
118118

119119
describe '#parse' do
120+
it "no_union" do
121+
path = "common/no_union.y"
122+
y = File.read(fixture_path(path))
123+
grammar = Lrama::Parser.new(y, path).parse
124+
grammar.prepare
125+
grammar.validate!
126+
127+
expect(grammar.union).to be_nil
128+
expect(grammar.nterms.sort_by(&:number)).to match_symbols([
129+
Sym.new(id: T::Ident.new(s_value: "$accept"), alias_name: nil, number: 6, tag: nil, term: false, token_id: 0, nullable: false),
130+
Sym.new(id: T::Ident.new(s_value: "program"), alias_name: nil, number: 7, tag: nil, term: false, token_id: 1, nullable: false),
131+
Sym.new(id: T::Ident.new(s_value: "expr"), alias_name: nil, number: 8, tag: nil, term: false, token_id: 2, nullable: false),
132+
])
133+
end
134+
135+
it "no_union_with_type" do
136+
path = "common/no_union_with_type.y"
137+
y = File.read(fixture_path(path))
138+
grammar = Lrama::Parser.new(y, path).parse
139+
grammar.prepare
140+
grammar.validate!
141+
142+
expect(grammar.union).to be_nil
143+
expect(grammar.nterms.sort_by(&:number)).to match_symbols([
144+
Sym.new(id: T::Ident.new(s_value: "$accept"), alias_name: nil, number: 6, tag: nil, term: false, token_id: 0, nullable: false),
145+
Sym.new(id: T::Ident.new(s_value: "program"), alias_name: nil, number: 7, tag: nil, term: false, token_id: 1, nullable: false),
146+
Sym.new(id: T::Ident.new(s_value: "expr"), alias_name: nil, number: 8, tag: T::Tag.new(s_value: "<val>"), term: false, token_id: 2, nullable: false),
147+
])
148+
expect(grammar.terms.find {|t| t.id.s_value == "NUMBER" }.tag).to eq(T::Tag.new(s_value: "<val>"))
149+
end
150+
120151
it "basic" do
121152
path = "common/basic.y"
122153
y = File.read(fixture_path(path))

0 commit comments

Comments
 (0)