A Nix language parser implemented in C (ragel + bison)
at main 252 lines 6.2 kB view raw
1%{ 2#include "node.h" 3#include "lexer.h" 4#include <stdio.h> 5#include <stdlib.h> 6 7/* Forward declarations of node helpers implemented in parser/nodetype.c */ 8Node* new_node(NodeType type, int token_start); 9Node* new_node1(NodeType type, int token_start, Node* n1); 10Node* new_node2(NodeType type, int token_start, Node* n1, Node* n2); 11Node* new_node3(NodeType type, int token_start, Node* n1, Node* n2, Node* n3); 12Node* op_node1(int op, int token_start, Node* n1); 13Node* op_node2(int op, int token_start, Node* n1, Node* n2); 14void set_node_type(Node* node, NodeType type); 15void add_child(Node* parent, Node* child); 16void set_token_end(Node* node, int token_end); 17 18/* yyerror matching the generated parser expectations */ 19void yyerror(void **ast_root, void *scanner, const char *s) { 20 (void)ast_root; (void)scanner; 21 fprintf(stderr, "parse error: %s\n", s); 22} 23 24/* yylex prototype using void* so it's visible before YYSTYPE is defined in the generated file */ 25int yylex(void *yylval_param, void *scanner); 26 27%} 28 29%parse-param { void** ast_root } 30%parse-param { void* scanner } 31%lex-param { void* scanner } 32 33 34%union { 35 int token; 36 Node* node; 37} 38 39%type <node> Main Expression Interp String ID Atom Select Apply Op InterpID AttrPath List Binds InheritList Function ArgSet Arg 40 41%define api.pure full 42%debug 43 44%token <token> ASSERT_ IF_ THEN ELSE_ LET IN WITH OR_ REC INHERIT ELLIPSIS INTERP SPACE COMMENT II 45%token <token> URI PATH FLOAT INT_ T_ID TEXT ARG_ID ARG_BRACKET 46 47%token <token> ':' '@' ',' ';' '"' '.' '(' ')' '[' ']' '{' '}' '=' 48 49%right <token> IMPL 50%left <token> OR 51%left <token> AND 52%nonassoc <token> EQ NEQ 53%left <token> '<' '>' LEQ GEQ 54%right <token> UPDATE 55%nonassoc <token> '!' 56%left <token> '+' '-' 57%left <token> '*' '/' 58%right <token> CONCAT 59%nonassoc <token> '?' 60%nonassoc <token> NEGATE 61 62%% 63 64 65Main 66: Expression 67{ *ast_root = $1; } 68; 69 70Expression 71: Op 72| ASSERT_ Expression ';' Expression 73{ $$ = new_node2(ASSERT_NODE, $1, $2, $4); set_token_end($$, $3); } 74| IF_ Expression THEN Expression ELSE_ Expression 75{ $$ = new_node3(IF_NODE, $1, $2, $4, $6); set_token_end($$, $3); } 76| LET Binds IN Expression 77{ $$ = new_node2(LET_NODE, $1, $2, $4); set_token_end($$, $3); } 78| WITH Expression ';' Expression 79{ $$ = new_node2(WITH_NODE, $1, $2, $4); set_token_end($$, $3); } 80| Function 81; 82 83Interp 84: INTERP Expression '}' 85{ $$ = new_node1(INTERP_NODE, $1, $2); set_token_end($$, $3); } 86; 87 88String 89: 90{ $$ = new_node(STRING_NODE, 0); } 91| String TEXT 92{ add_child($1, new_node(TEXT_NODE, $2)); $$ = $1; } 93| String Interp 94{ add_child($1, $2); $$ = $1; } 95; 96 97ID 98: T_ID 99{ $$ = new_node(ID_NODE, $1); } 100; 101 102Atom 103: URI 104{ $$ = new_node(URI_NODE, $1); } 105| PATH 106{ $$ = new_node(PATH_NODE, $1); } 107| FLOAT 108{ $$ = new_node(FLOAT_NODE, $1); } 109| INT_ 110{ $$ = new_node(INT_NODE, $1); } 111| ID 112| '"' String '"' 113{ set_token_end($2, $3); $$ = $2; } 114| II String II 115{ set_node_type($2, I_STRING_NODE); set_token_end($2, $3); $$ = $2; } 116| '(' Expression ')' 117{ $$ = new_node1(PARENS_NODE, $1, $2); set_token_end($$, $3); } 118| '[' List ']' 119{ set_token_end($2, $3); $$ = $2; } 120| '{' Binds '}' 121{ set_node_type($2, SET_NODE); set_token_end($2, $3); $$ = $2; } 122| REC '{' Binds '}' 123{ set_node_type($3, REC_SET_NODE); set_token_end($3, $4); $$ = $3; } 124; 125 126Select 127: Atom 128| Atom '.' AttrPath 129{ $$ = new_node2(SELECT_NODE, $2, $1, $3); } 130| Atom '.' AttrPath OR_ Select 131{ $$ = new_node3(SELECT_OR_NODE, $2, $1, $3, $5); set_token_end($$, $4); } 132; 133 134Apply 135: Select 136| Apply Select 137{ $$ = new_node2(APPLY_NODE, 0, $1, $2); } 138; 139 140Op 141: Apply 142| '-' Op %prec NEGATE 143{ $$ = op_node1('-', $1, $2); } 144| Op '?' AttrPath 145{ $$ = op_node2('?', $2, $1, $3); } 146| Op CONCAT Op 147{ $$ = op_node2(CONCAT, $2, $1, $3); } 148| Op '/' Op 149{ $$ = op_node2('/', $2, $1, $3); } 150| Op '*' Op 151{ $$ = op_node2('*', $2, $1, $3); } 152| Op '-' Op 153{ $$ = op_node2('-', $2, $1, $3); } 154| Op '+' Op 155{ $$ = op_node2('+', $2, $1, $3); } 156| '!' Op 157{ $$ = op_node1('!', $1, $2); } 158| Op UPDATE Op 159{ $$ = op_node2(UPDATE, $2, $1, $3); } 160| Op GEQ Op 161{ $$ = op_node2(GEQ, $2, $1, $3); } 162| Op LEQ Op 163{ $$ = op_node2(LEQ, $2, $1, $3); } 164| Op '>' Op 165{ $$ = op_node2('>', $2, $1, $3); } 166| Op '<' Op 167{ $$ = op_node2('<', $2, $1, $3); } 168| Op NEQ Op 169{ $$ = op_node2(NEQ, $2, $1, $3); } 170| Op EQ Op 171{ $$ = op_node2(EQ, $2, $1, $3); } 172| Op AND Op 173{ $$ = op_node2(AND, $2, $1, $3); } 174| Op OR Op 175{ $$ = op_node2(OR, $2, $1, $3); } 176| Op IMPL Op 177{ $$ = op_node2(IMPL, $2, $1, $3); } 178; 179 180InterpID 181: ID 182| OR_ 183{ $$ = new_node(ID_NODE, $1); } 184| Interp 185| '"' String '"' 186{ set_token_end($2, $3); $$ = $2; } 187; 188 189AttrPath 190: InterpID 191{ $$ = new_node1(ATTR_PATH_NODE, 0, $1); } 192| AttrPath '.' InterpID 193{ add_child($1, $3); set_token_end($1, 0); $$ = $1; } 194; 195 196List 197: 198{ $$ = new_node(LIST_NODE, 0); } 199| List Select 200{ add_child($1, $2); $$ = $1; } 201; 202 203Binds 204: 205{ $$ = new_node(BINDS_NODE, 0); } 206| Binds AttrPath '=' Expression ';' 207{ add_child($1, new_node2(BIND_NODE, $3, $2, $4)); $$ = $1; } 208| Binds INHERIT InheritList ';' 209{ add_child($1, new_node1(INHERIT_NODE, $2, $3)); $$ = $1; } 210| Binds INHERIT '(' Expression ')' InheritList ';' 211{ add_child($1, new_node2(INHERIT_FROM_NODE, $2, $4, $6)); $$ = $1; } 212; 213 214InheritList 215: 216{ $$ = new_node(INHERIT_LIST_NODE, 0); } 217| InheritList InterpID 218{ add_child($1, $2); $$ = $1; } 219; 220 221Function 222: ARG_ID ':' Expression 223{ $$ = new_node2(FUNCTION_NODE, $2, new_node(ID_NODE, $1), $3); } 224| ARG_BRACKET ArgSet '}' ':' Expression 225{ $$ = new_node2(FUNCTION_NODE, $4, $2, $5); set_token_end($2, $3); } 226| ARG_ID '@' ARG_BRACKET ArgSet '}' ':' Expression 227{ $$ = new_node3(FUNCTION_NODE, $2, new_node(ID_NODE, $1), $4, $7); set_token_end($4, $5); } 228| ARG_BRACKET ArgSet '}' '@' ID ':' Expression 229{ $$ = new_node3(FUNCTION_NODE, $4, $5, $2, $7); set_token_end($2, $3); } 230| ARG_BRACKET ArgSet '}' '@' ARG_ID ':' Expression 231{ $$ = new_node3(FUNCTION_NODE, $4, new_node(ID_NODE, $5), $2, $7); set_token_end($2, $3); } 232; 233 234ArgSet 235: 236{ $$ = new_node(ARG_SET_NODE, 0); } 237| Arg 238{ $$ = new_node1(ARG_SET_NODE, 0, $1); } 239| ELLIPSIS 240{ $$ = new_node1(ARG_SET_NODE, 0, new_node(ARG_NODE, $1)); } 241| Arg ',' ArgSet 242{ add_child($3, $1); $$ = $3; } 243; 244 245Arg 246: ID 247{ $$ = new_node1(ARG_NODE, 0, $1); } 248| ID '?' Expression 249{ $$ = new_node2(ARG_NODE, $2, $1, $3); } 250; 251 252%%