A Nix language parser implemented in C (ragel + bison)
1%{
2#include "node.h"
3#include "lexer.h"
4#include <stdio.h>
5#include <stdlib.h>
6
7/* Forward declarations of node helpers implemented in parser/nodetype.c */
8Node* new_node(NodeType type, int token_start);
9Node* new_node1(NodeType type, int token_start, Node* n1);
10Node* new_node2(NodeType type, int token_start, Node* n1, Node* n2);
11Node* new_node3(NodeType type, int token_start, Node* n1, Node* n2, Node* n3);
12Node* op_node1(int op, int token_start, Node* n1);
13Node* op_node2(int op, int token_start, Node* n1, Node* n2);
14void set_node_type(Node* node, NodeType type);
15void add_child(Node* parent, Node* child);
16void set_token_end(Node* node, int token_end);
17
18/* yyerror matching the generated parser expectations */
19void yyerror(void **ast_root, void *scanner, const char *s) {
20 (void)ast_root; (void)scanner;
21 fprintf(stderr, "parse error: %s\n", s);
22}
23
24/* yylex prototype using void* so it's visible before YYSTYPE is defined in the generated file */
25int yylex(void *yylval_param, void *scanner);
26
27%}
28
29%parse-param { void** ast_root }
30%parse-param { void* scanner }
31%lex-param { void* scanner }
32
33
34%union {
35 int token;
36 Node* node;
37}
38
39%type <node> Main Expression Interp String ID Atom Select Apply Op InterpID AttrPath List Binds InheritList Function ArgSet Arg
40
41%define api.pure full
42%debug
43
44%token <token> ASSERT_ IF_ THEN ELSE_ LET IN WITH OR_ REC INHERIT ELLIPSIS INTERP SPACE COMMENT II
45%token <token> URI PATH FLOAT INT_ T_ID TEXT ARG_ID ARG_BRACKET
46
47%token <token> ':' '@' ',' ';' '"' '.' '(' ')' '[' ']' '{' '}' '='
48
49%right <token> IMPL
50%left <token> OR
51%left <token> AND
52%nonassoc <token> EQ NEQ
53%left <token> '<' '>' LEQ GEQ
54%right <token> UPDATE
55%nonassoc <token> '!'
56%left <token> '+' '-'
57%left <token> '*' '/'
58%right <token> CONCAT
59%nonassoc <token> '?'
60%nonassoc <token> NEGATE
61
62%%
63
64
65Main
66: Expression
67{ *ast_root = $1; }
68;
69
70Expression
71: Op
72| ASSERT_ Expression ';' Expression
73{ $$ = new_node2(ASSERT_NODE, $1, $2, $4); set_token_end($$, $3); }
74| IF_ Expression THEN Expression ELSE_ Expression
75{ $$ = new_node3(IF_NODE, $1, $2, $4, $6); set_token_end($$, $3); }
76| LET Binds IN Expression
77{ $$ = new_node2(LET_NODE, $1, $2, $4); set_token_end($$, $3); }
78| WITH Expression ';' Expression
79{ $$ = new_node2(WITH_NODE, $1, $2, $4); set_token_end($$, $3); }
80| Function
81;
82
83Interp
84: INTERP Expression '}'
85{ $$ = new_node1(INTERP_NODE, $1, $2); set_token_end($$, $3); }
86;
87
88String
89:
90{ $$ = new_node(STRING_NODE, 0); }
91| String TEXT
92{ add_child($1, new_node(TEXT_NODE, $2)); $$ = $1; }
93| String Interp
94{ add_child($1, $2); $$ = $1; }
95;
96
97ID
98: T_ID
99{ $$ = new_node(ID_NODE, $1); }
100;
101
102Atom
103: URI
104{ $$ = new_node(URI_NODE, $1); }
105| PATH
106{ $$ = new_node(PATH_NODE, $1); }
107| FLOAT
108{ $$ = new_node(FLOAT_NODE, $1); }
109| INT_
110{ $$ = new_node(INT_NODE, $1); }
111| ID
112| '"' String '"'
113{ set_token_end($2, $3); $$ = $2; }
114| II String II
115{ set_node_type($2, I_STRING_NODE); set_token_end($2, $3); $$ = $2; }
116| '(' Expression ')'
117{ $$ = new_node1(PARENS_NODE, $1, $2); set_token_end($$, $3); }
118| '[' List ']'
119{ set_token_end($2, $3); $$ = $2; }
120| '{' Binds '}'
121{ set_node_type($2, SET_NODE); set_token_end($2, $3); $$ = $2; }
122| REC '{' Binds '}'
123{ set_node_type($3, REC_SET_NODE); set_token_end($3, $4); $$ = $3; }
124;
125
126Select
127: Atom
128| Atom '.' AttrPath
129{ $$ = new_node2(SELECT_NODE, $2, $1, $3); }
130| Atom '.' AttrPath OR_ Select
131{ $$ = new_node3(SELECT_OR_NODE, $2, $1, $3, $5); set_token_end($$, $4); }
132;
133
134Apply
135: Select
136| Apply Select
137{ $$ = new_node2(APPLY_NODE, 0, $1, $2); }
138;
139
140Op
141: Apply
142| '-' Op %prec NEGATE
143{ $$ = op_node1('-', $1, $2); }
144| Op '?' AttrPath
145{ $$ = op_node2('?', $2, $1, $3); }
146| Op CONCAT Op
147{ $$ = op_node2(CONCAT, $2, $1, $3); }
148| Op '/' Op
149{ $$ = op_node2('/', $2, $1, $3); }
150| Op '*' Op
151{ $$ = op_node2('*', $2, $1, $3); }
152| Op '-' Op
153{ $$ = op_node2('-', $2, $1, $3); }
154| Op '+' Op
155{ $$ = op_node2('+', $2, $1, $3); }
156| '!' Op
157{ $$ = op_node1('!', $1, $2); }
158| Op UPDATE Op
159{ $$ = op_node2(UPDATE, $2, $1, $3); }
160| Op GEQ Op
161{ $$ = op_node2(GEQ, $2, $1, $3); }
162| Op LEQ Op
163{ $$ = op_node2(LEQ, $2, $1, $3); }
164| Op '>' Op
165{ $$ = op_node2('>', $2, $1, $3); }
166| Op '<' Op
167{ $$ = op_node2('<', $2, $1, $3); }
168| Op NEQ Op
169{ $$ = op_node2(NEQ, $2, $1, $3); }
170| Op EQ Op
171{ $$ = op_node2(EQ, $2, $1, $3); }
172| Op AND Op
173{ $$ = op_node2(AND, $2, $1, $3); }
174| Op OR Op
175{ $$ = op_node2(OR, $2, $1, $3); }
176| Op IMPL Op
177{ $$ = op_node2(IMPL, $2, $1, $3); }
178;
179
180InterpID
181: ID
182| OR_
183{ $$ = new_node(ID_NODE, $1); }
184| Interp
185| '"' String '"'
186{ set_token_end($2, $3); $$ = $2; }
187;
188
189AttrPath
190: InterpID
191{ $$ = new_node1(ATTR_PATH_NODE, 0, $1); }
192| AttrPath '.' InterpID
193{ add_child($1, $3); set_token_end($1, 0); $$ = $1; }
194;
195
196List
197:
198{ $$ = new_node(LIST_NODE, 0); }
199| List Select
200{ add_child($1, $2); $$ = $1; }
201;
202
203Binds
204:
205{ $$ = new_node(BINDS_NODE, 0); }
206| Binds AttrPath '=' Expression ';'
207{ add_child($1, new_node2(BIND_NODE, $3, $2, $4)); $$ = $1; }
208| Binds INHERIT InheritList ';'
209{ add_child($1, new_node1(INHERIT_NODE, $2, $3)); $$ = $1; }
210| Binds INHERIT '(' Expression ')' InheritList ';'
211{ add_child($1, new_node2(INHERIT_FROM_NODE, $2, $4, $6)); $$ = $1; }
212;
213
214InheritList
215:
216{ $$ = new_node(INHERIT_LIST_NODE, 0); }
217| InheritList InterpID
218{ add_child($1, $2); $$ = $1; }
219;
220
221Function
222: ARG_ID ':' Expression
223{ $$ = new_node2(FUNCTION_NODE, $2, new_node(ID_NODE, $1), $3); }
224| ARG_BRACKET ArgSet '}' ':' Expression
225{ $$ = new_node2(FUNCTION_NODE, $4, $2, $5); set_token_end($2, $3); }
226| ARG_ID '@' ARG_BRACKET ArgSet '}' ':' Expression
227{ $$ = new_node3(FUNCTION_NODE, $2, new_node(ID_NODE, $1), $4, $7); set_token_end($4, $5); }
228| ARG_BRACKET ArgSet '}' '@' ID ':' Expression
229{ $$ = new_node3(FUNCTION_NODE, $4, $5, $2, $7); set_token_end($2, $3); }
230| ARG_BRACKET ArgSet '}' '@' ARG_ID ':' Expression
231{ $$ = new_node3(FUNCTION_NODE, $4, new_node(ID_NODE, $5), $2, $7); set_token_end($2, $3); }
232;
233
234ArgSet
235:
236{ $$ = new_node(ARG_SET_NODE, 0); }
237| Arg
238{ $$ = new_node1(ARG_SET_NODE, 0, $1); }
239| ELLIPSIS
240{ $$ = new_node1(ARG_SET_NODE, 0, new_node(ARG_NODE, $1)); }
241| Arg ',' ArgSet
242{ add_child($3, $1); $$ = $3; }
243;
244
245Arg
246: ID
247{ $$ = new_node1(ARG_NODE, 0, $1); }
248| ID '?' Expression
249{ $$ = new_node2(ARG_NODE, $2, $1, $3); }
250;
251
252%%