the hito embeddable programming language
at main 1073 lines 32 kB view raw
1#include "syntax.h" 2#include "bitset.h" 3#include "builtins.h" 4#include "error.h" 5#include "lexer.h" 6#include "scope.h" 7#include "util.h" 8#include "intern.h" 9#include "value.h" 10#include <string.h> 11#include <ctype.h> 12#include <stdlib.h> 13#include <stdbool.h> 14#include <stdio.h> 15 16static bool _expect(lexer_t *lexer, token_type expected, error_tag on_failure) { 17 token_t tok = lexer_lex(lexer); 18 if (tok.type != expected) { 19 error_report(error(on_failure,lexer_source(lexer),tok.pos,tok.len)); 20 return false; 21 } else { 22 return true; 23 } 24} 25 26static expr_t *_alloc_expr(void) { 27 expr_t *ptr = malloc(sizeof(struct expr)); 28 if (ptr == NULL) { 29 die("Out of memory: cannot allocate expr struct"); 30 } 31 return ptr; 32} 33 34static char *_parse_string_literal(const char *input) { 35 if (!input || *input != '"') { 36 die("Provided string is not a string literal"); 37 } 38 const char *p = input + 1; // skip opening quote 39 size_t capacity = 16; 40 size_t length = 0; 41 char *result = malloc(capacity); 42 if (result == NULL) { 43 die("Out of memory: Cannot allocate space for string literal"); 44 } 45 46 while (*p != '\0' && *p != '"') { 47 char c = *p++; 48 if (c == '\\') { 49 if (!*p) break; // unterminated escape 50 char esc = *p++; 51 switch (esc) { 52 case 'n': c = '\n'; break; 53 case 't': c = '\t'; break; 54 case 'r': c = '\r'; break; 55 case '\\': c = '\\'; break; 56 case '"': c = '"'; break; 57 case '\'': c = '\''; break; 58 default: c = esc; break; 59 } 60 } 61 if (length + 1 >= capacity) { 62 capacity *= 2; 63 char *tmp = realloc(result, capacity); 64 if (tmp == NULL) { 65 die("Out of memory: Cannot enlarge space for string literal"); 66 } 67 result = tmp; 68 } 69 result[length++] = c; 70 } 71 if (*p != '"') { 72 die("The impossible happened"); 73 } 74 75 result[length] = '\0'; // null-terminate 76 return result; 77} 78 79static expr_t *_parse_expr(lexer_t *lexer, intern_table_t *table); 80 81 82static expr_t *_parse_atom(lexer_t *lexer, intern_table_t *table) { 83 token_t tok = lexer_peek(lexer); 84 switch (tok.type) { 85 case TOKEN_LPAREN: { 86 lexer_lex(lexer); 87 88 expr_t *first_expr = _parse_expr(lexer, table); 89 token_t next_tok = lexer_peek(lexer); 90 91 if (first_expr == NULL) { 92 if (next_tok.type == TOKEN_RPAREN) { 93 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), next_tok.pos, 1)); 94 95 lexer_lex(lexer); // Consume ')' 96 return NULL; 97 } 98 return NULL; 99 } 100 101 if (next_tok.type != TOKEN_COMMA) { 102 // Single element parenthesized expression 103 if (!_expect(lexer, TOKEN_RPAREN, ERROR_EXPECTED_RPAREN)) { 104 syntax_expr_free(first_expr); 105 return NULL; 106 } 107 return first_expr; 108 } 109 110 // Multi-element tuple 111 lexer_lex(lexer); // consume the first ',' 112 113 size_t cap = 4; 114 size_t nargs = 1; 115 expr_t **args = malloc(cap * sizeof(expr_t*)); 116 if (args == NULL) 117 die("Out of memory: can't allocate args array for tuple"); 118 119 args[0] = first_expr; 120 121 while (1) { 122 token_t next_arg_tok = lexer_peek(lexer); 123 expr_t *arg = _parse_expr(lexer, table); 124 125 if (arg == NULL) { 126 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), next_arg_tok.pos, 1)); 127 128 for (size_t i = 0; i < nargs; i++) { 129 syntax_expr_free(args[i]); 130 } 131 free(args); 132 return NULL; 133 } 134 135 if (nargs == cap) { 136 cap *= 2; 137 expr_t **temp_args = realloc(args, cap * sizeof(expr_t*)); 138 if (temp_args == NULL) 139 die("Out of memory: can't enlarge args array for tuple"); 140 args = temp_args; 141 } 142 143 args[nargs++] = arg; 144 145 token_t post_arg_tok = lexer_peek(lexer); 146 if (post_arg_tok.type != TOKEN_COMMA) { 147 break; 148 } 149 lexer_lex(lexer); // consume ',' 150 } 151 152 if (!_expect(lexer, TOKEN_RPAREN, ERROR_EXPECTED_RPAREN)) { 153 for (size_t i = 0; i < nargs; i++) { 154 syntax_expr_free(args[i]); 155 } 156 free(args); 157 return NULL; 158 } 159 160 if (nargs < cap) { 161 expr_t **temp_args = realloc(args, nargs * sizeof(expr_t*)); 162 if (temp_args == NULL) { 163 die("Out of memory: can't resize args array for tuple"); 164 } 165 args = temp_args; 166 } 167 168 expr_t *tuple = _alloc_expr(); 169 tuple->pos = tok.pos; 170 tuple->source = lexer_source(lexer); 171 tuple->tag = EXPR_TUPLE; 172 tuple->as.tuple.elements = args; 173 tuple->as.tuple.nelements = nargs; 174 return tuple; 175 } 176 case TOKEN_LBRACE: { 177 lexer_lex(lexer); 178 179 expr_t *first_expr = _parse_expr(lexer, table); 180 token_t next_tok = lexer_peek(lexer); 181 182 if (first_expr == NULL) { 183 if (next_tok.type == TOKEN_RBRACE) { 184 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), next_tok.pos, 1)); 185 186 lexer_lex(lexer); // Consume ')' 187 return NULL; 188 } 189 return NULL; 190 } 191 192 if (next_tok.type != TOKEN_COMMA) { 193 // Single element parenthesized expression 194 if (!_expect(lexer, TOKEN_RBRACE, ERROR_EXPECTED_RBRACE)) { 195 syntax_expr_free(first_expr); 196 return NULL; 197 } 198 return first_expr; 199 } 200 201 // Multi-element tuple 202 lexer_lex(lexer); // consume the first ',' 203 204 size_t cap = 4; 205 size_t nargs = 1; 206 expr_t **args = malloc(cap * sizeof(expr_t*)); 207 if (args == NULL) 208 die("Out of memory: can't allocate args array for function group"); 209 210 args[0] = first_expr; 211 212 while (1) { 213 token_t next_arg_tok = lexer_peek(lexer); 214 expr_t *arg = _parse_expr(lexer, table); 215 216 if (arg == NULL) { 217 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), next_arg_tok.pos, 1)); 218 219 for (size_t i = 0; i < nargs; i++) { 220 syntax_expr_free(args[i]); 221 } 222 free(args); 223 return NULL; 224 } 225 226 if (nargs == cap) { 227 cap *= 2; 228 expr_t **temp_args = realloc(args, cap * sizeof(expr_t*)); 229 if (temp_args == NULL) 230 die("Out of memory: can't enlarge args array for function group"); 231 args = temp_args; 232 } 233 234 args[nargs++] = arg; 235 236 token_t post_arg_tok = lexer_peek(lexer); 237 if (post_arg_tok.type != TOKEN_COMMA) { 238 break; 239 } 240 lexer_lex(lexer); // consume ',' 241 } 242 243 if (!_expect(lexer, TOKEN_RBRACE, ERROR_EXPECTED_RBRACE)) { 244 for (size_t i = 0; i < nargs; i++) { 245 syntax_expr_free(args[i]); 246 } 247 free(args); 248 return NULL; 249 } 250 251 if (nargs < cap) { 252 expr_t **temp_args = realloc(args, nargs * sizeof(expr_t*)); 253 if (temp_args == NULL) { 254 die("Out of memory: can't resize args array for function group"); 255 } 256 args = temp_args; 257 } 258 259 expr_t *tuple = _alloc_expr(); 260 tuple->pos = tok.pos; 261 tuple->source = lexer_source(lexer); 262 tuple->tag = EXPR_GROUP; 263 tuple->as.tuple.elements = args; 264 tuple->as.tuple.nelements = nargs; 265 return tuple; 266 } 267 case TOKEN_IDENT: { 268 lexer_lex(lexer); 269 intern_t name = intern_prefix(table,tok.start,tok.len); 270 expr_t *it = _alloc_expr(); 271 it->pos = tok.pos; 272 it->source = lexer_source(lexer); 273 it->tag = EXPR_PAT_IDENT; 274 it->as.ident = name; 275 return it; 276 } 277 case TOKEN_FLOAT_LIT: { 278 lexer_lex(lexer); 279 double value = strtod(tok.start, NULL); 280 expr_t *lit = _alloc_expr(); 281 lit->pos = tok.pos; 282 lit->source = lexer_source(lexer); 283 lit->tag = EXPR_FLOAT_LIT; 284 lit->as.float_lit = value; 285 return lit; 286 } 287 case TOKEN_INT_LIT: { 288 lexer_lex(lexer); 289 long value = strtol(tok.start, NULL, 10); 290 expr_t *lit = _alloc_expr(); 291 lit->pos = tok.pos; 292 lit->source = lexer_source(lexer); 293 lit->tag = EXPR_INT_LIT; 294 lit->as.int_lit = value; 295 return lit; 296 } 297 case TOKEN_CONSTRUCTOR: { 298 lexer_lex(lexer); 299 intern_t name = intern_prefix(table,tok.start,tok.len); 300 expr_t *it = _alloc_expr(); 301 it->pos = tok.pos; 302 it->source = lexer_source(lexer); 303 it->tag = EXPR_CONSTRUCTOR; 304 it->as.ident = name; 305 return it; 306 } 307 case TOKEN_STRING_LIT: { 308 lexer_lex(lexer); 309 char* contents = _parse_string_literal(tok.start); 310 expr_t *lit = _alloc_expr(); 311 lit->pos = tok.pos; 312 lit->source = lexer_source(lexer); 313 lit->tag = EXPR_STRING_LIT; 314 lit->as.string_lit = contents; 315 return lit; 316 } 317 default: return NULL; 318 } 319} 320 321static expr_t *_parse_call_expr(lexer_t *lexer, intern_table_t *table) { 322 expr_t *lhs = _parse_atom(lexer, table); 323 if (lhs == NULL) return NULL; 324 325 // Loop to handle chained calls like f(a)(b) 326 // If we see '(', we treat 'lhs' as the callee, parse arguments, 327 // update 'lhs' to the new call node, and repeat. 328 while (lexer_peek(lexer).type == TOKEN_LPAREN) { 329 lexer_lex(lexer); // consume '(' 330 331 size_t cap = 4; 332 size_t nargs = 0; 333 expr_t **args = malloc(cap * sizeof(expr_t*)); 334 if (args == NULL) die("Out of memory: can't allocate args array"); 335 336 while (1) { 337 pos_t arg_start_pos = lexer_cur_pos(lexer); 338 // Use _parse_expr here to allow complex args like f(a + b) 339 expr_t *arg = _parse_expr(lexer, table); 340 341 if (arg == NULL) { 342 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), arg_start_pos, 1)); 343 344 // Cleanup 345 for (size_t i = 0; i < nargs; i++) { 346 syntax_expr_free(args[i]); 347 } 348 free(args); 349 syntax_expr_free(lhs); 350 return NULL; 351 } 352 353 if (nargs == cap) { 354 cap *= 2; 355 args = realloc(args, cap * sizeof(expr_t*)); 356 if (args == NULL) 357 die("Out of memory: can't enlarge args array"); 358 } 359 360 args[nargs++] = arg; 361 362 // If we see a comma, consume it and continue to next arg 363 if (lexer_peek(lexer).type == TOKEN_COMMA) { 364 lexer_lex(lexer); 365 continue; 366 } 367 368 // If no comma, we expect the closing parenthesis 369 break; 370 } 371 372 if (!_expect(lexer, TOKEN_RPAREN, ERROR_EXPECTED_RPAREN)) { 373 // Cleanup 374 for (size_t i = 0; i < nargs; i++) { 375 syntax_expr_free(args[i]); 376 } 377 free(args); 378 syntax_expr_free(lhs); 379 return NULL; 380 } 381 // _expect consumes ')' on success 382 383 // shrink array to exact size 384 args = realloc(args, nargs * sizeof(expr_t*)); 385 if (args == NULL) 386 die("Out of memory: can't resize args array"); 387 388 // build n-ary call node 389 expr_t *call = _alloc_expr(); 390 call->pos = lhs->pos; 391 call->source = lexer_source(lexer); 392 call->tag = EXPR_CALL; 393 call->as.call.callee = lhs; 394 call->as.call.arguments = args; 395 call->as.call.nargs = nargs; 396 397 // The result of this call becomes the LHS for the next potential call in the chain 398 lhs = call; 399 } 400 401 return lhs; 402} 403 404static expr_t *_parse_arith_plus(lexer_t *lexer, intern_table_t *table) { 405 expr_t *lhs = _parse_call_expr(lexer, table); 406 while (1) { 407 token_t tok = lexer_peek(lexer); 408 intern_t name = (tok.type == TOKEN_PLUS) ? intern(table, "+") 409 : (tok.type == TOKEN_MINUS) ? intern(table,"-") 410 : NULL; 411 if (name == NULL) { 412 return lhs; 413 } 414 lexer_lex(lexer); 415 expr_t *rhs = _parse_call_expr(lexer, table); 416 expr_t *it = _alloc_expr(); 417 it->pos = tok.pos; 418 it->source = lexer_source(lexer); 419 it->tag = EXPR_CALL; 420 it->as.call.arguments = malloc(sizeof(expr_t*) * 2); 421 if (it->as.call.arguments == NULL) 422 die("Out of memory: cannot allocate args array"); 423 it->as.call.arguments[0] = lhs; 424 it->as.call.arguments[1] = rhs; 425 it->as.call.nargs = 2; 426 it->as.call.callee = _alloc_expr(); 427 it->as.call.callee->tag = EXPR_PAT_IDENT; 428 it->as.call.callee->pos = tok.pos; 429 it->as.call.callee->source = lexer_source(lexer); 430 it->as.call.callee->as.ident = name; 431 lhs = it; 432 } 433} 434 435 436static expr_t *_parse_arith(lexer_t *lexer, intern_table_t *table) { 437 expr_t *lhs = _parse_arith_plus(lexer, table); 438 while (1) { 439 token_t tok = lexer_peek(lexer); 440 intern_t name = (tok.type == TOKEN_MULT) ? intern(table, "*") 441 : (tok.type == TOKEN_DIV) ? intern(table,"/") 442 : (tok.type == TOKEN_MOD) ? intern(table,"mod") 443 : (tok.type == TOKEN_IDIV) ? intern(table,"div") 444 : NULL; 445 if (name == NULL) { 446 return lhs; 447 } 448 lexer_lex(lexer); 449 expr_t *rhs = _parse_arith_plus(lexer, table); 450 expr_t *it = _alloc_expr(); 451 it->tag = EXPR_CALL; 452 it->pos = tok.pos; 453 it->source = lexer_source(lexer); 454 it->as.call.arguments = malloc(sizeof(expr_t*) * 2); 455 if (it->as.call.arguments == NULL) 456 die("Out of memory: cannot allocate args array"); 457 it->as.call.arguments[0] = lhs; 458 it->as.call.arguments[1] = rhs; 459 it->as.call.nargs = 2; 460 it->as.call.callee = _alloc_expr(); 461 it->as.call.callee->tag = EXPR_PAT_IDENT; 462 it->as.call.callee->pos = tok.pos; 463 it->as.call.callee->source = lexer_source(lexer); 464 it->as.call.callee->as.ident = name; 465 lhs = it; 466 } 467} 468 469 470static expr_t *_parse_binop_comp(lexer_t *lexer, intern_table_t *table) { 471 expr_t *lhs = _parse_arith(lexer, table); 472 token_t tok = lexer_peek(lexer); 473 intern_t name = (tok.type == TOKEN_EQ) ? intern(table, "=") 474 : (tok.type == TOKEN_NOTEQ) ? intern(table,"/=") 475 : (tok.type == TOKEN_GREATER) ? intern(table,">") 476 : (tok.type == TOKEN_LESS) ? intern(table,"<") 477 : (tok.type == TOKEN_LESSEQ) ? intern(table,"<=") 478 : (tok.type == TOKEN_GREATEREQ) ? intern(table,">=") 479 : NULL; 480 if (name != NULL) { 481 lexer_lex(lexer); 482 expr_t *rhs = _parse_arith(lexer, table); 483 expr_t *it = _alloc_expr(); 484 it->tag = EXPR_CALL; 485 it->source = lexer_source(lexer); 486 it->pos = tok.pos; 487 it->as.call.arguments = malloc(sizeof(expr_t*) * 2); 488 if (it->as.call.arguments == NULL) 489 die("Out of memory: cannot allocate args array"); 490 it->as.call.arguments[0] = lhs; 491 it->as.call.arguments[1] = rhs; 492 it->as.call.nargs = 2; 493 it->as.call.callee = _alloc_expr(); 494 it->as.call.callee->pos = tok.pos; 495 it->as.call.callee->source = lexer_source(lexer); 496 it->as.call.callee->tag = EXPR_PAT_IDENT; 497 it->as.call.callee->as.ident = name; 498 return it; 499 } 500 return lhs; 501} 502 503static expr_t *_parse_binop_and(lexer_t *lexer, intern_table_t *table) { 504 expr_t *lhs = _parse_binop_comp(lexer, table); 505 token_t tok = lexer_peek(lexer); 506 if (tok.type == TOKEN_AND) { 507 lexer_lex(lexer); 508 expr_t *rhs = _parse_binop_and(lexer, table); 509 expr_t *it = _alloc_expr(); 510 it->source = lexer_source(lexer); 511 it->pos = tok.pos; 512 it->tag = EXPR_SEQ; 513 it->as.seq.first = lhs; 514 it->as.seq.then = rhs; 515 return it; 516 } 517 return lhs; 518} 519static expr_t *_parse_binop_or(lexer_t *lexer, intern_table_t *table) { 520 expr_t *lhs = _parse_binop_and(lexer, table); 521 token_t tok = lexer_peek(lexer); 522 if (tok.type == TOKEN_OR) { 523 lexer_lex(lexer); 524 expr_t *rhs = _parse_binop_or(lexer, table); 525 expr_t *it = _alloc_expr(); 526 it->tag = EXPR_ALT; 527 it->source = lexer_source(lexer); 528 it->pos = tok.pos; 529 it->as.alt.try = lhs; 530 it->as.alt.orelse = rhs; 531 return it; 532 } 533 return lhs; 534} 535/* 536static expr_t *_parse_binop_or(lexer_t *lexer, intern_table_t *table) { 537 expr_t *lhs = _parse_binop_and(lexer, table); 538 token_t tok = lexer_peek(lexer); 539 intern_t or_name = intern(table, "or"); 540 if (tok.type == TOKEN_OR) { 541 lexer_lex(lexer); 542 expr_t *rhs = _parse_binop_or(lexer, table); 543 expr_t *it = _alloc_expr(); 544 it->tag = EXPR_CALL; 545 it->as.call.arguments = malloc(sizeof(expr_t*) * 2); 546 if (it->as.call.arguments == NULL) 547 die("Out of memory: cannot allocate args array"); 548 it->as.call.arguments[0] = lhs; 549 it->as.call.arguments[1] = rhs; 550 it->as.call.callee = _alloc_expr(); 551 it->as.call.callee->tag = EXPR_PAT_IDENT; 552 it->as.call.callee->as.ident = or_name; 553 return it; 554 } 555 return lhs; 556} 557*/ 558static expr_t *_parse_sequence_expr(lexer_t *lexer, intern_table_t *table) { 559 token_t tok = lexer_peek(lexer); 560 if (tok.type == TOKEN_IF) { 561 lexer_lex(lexer); 562 expr_t *lhs = _parse_call_expr(lexer, table); 563 if (lhs == NULL) { 564 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), tok.pos, 1)); 565 } 566 pos_t pos = lexer_cur_pos(lexer); 567 expr_t *rhs = _parse_sequence_expr(lexer, table); 568 if (rhs == NULL) { 569 syntax_expr_free(lhs); 570 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), pos, 1)); 571 } 572 573 expr_t *sequence = _alloc_expr(); 574 sequence->tag = EXPR_SEQ; 575 sequence->pos = lhs->pos; 576 sequence->source = lhs->source; 577 sequence->as.seq.first = lhs; 578 sequence->as.seq.then = rhs; 579 return sequence; 580 } 581 expr_t *lhs = _parse_binop_or(lexer, table); 582 if (lhs == NULL) return NULL; 583 584 token_type next_token = lexer_peek(lexer).type; 585 586 if (next_token == TOKEN_ASSIGN) { 587 expr_t *pattern = lhs; 588 lhs = NULL; // Ownership transferred to 'pattern' variable 589 lexer_lex(lexer); // consume := 590 pos_t value_pos = lexer_cur_pos(lexer); 591 592 expr_t *value_expr = _parse_expr(lexer, table); 593 if (value_expr == NULL) { 594 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), value_pos, 1)); 595 syntax_expr_free(pattern); 596 return NULL; 597 } 598 599 if (!_expect(lexer, TOKEN_SEMI, ERROR_EXPECTED_SEMI)) { 600 syntax_expr_free(value_expr); 601 syntax_expr_free(pattern); 602 return NULL; 603 } 604 605 pos_t body_pos = lexer_cur_pos(lexer); 606 expr_t *body_expr = _parse_sequence_expr(lexer, table); 607 if (body_expr == NULL) { 608 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), body_pos, 1)); 609 syntax_expr_free(value_expr); 610 // Cleanup: Free the pattern node 611 syntax_expr_free(pattern); 612 return NULL; 613 } 614 615 expr_t *let_binding = _alloc_expr(); 616 let_binding->tag = EXPR_LET_BINDING; 617 let_binding->pos = pattern->pos; 618 let_binding->source = pattern->source; 619 let_binding->as.let_binding.pattern.expr = pattern; 620 let_binding->as.let_binding.pattern.bound_vars = (size_t)-1; 621 let_binding->as.let_binding.expr = value_expr; 622 let_binding->as.let_binding.in = body_expr; 623 624 return let_binding; 625 } 626 627 628 return lhs; 629} 630 631static expr_t *_parse_alt_expr(lexer_t *lexer, intern_table_t *table) { 632 expr_t *lhs = _parse_sequence_expr(lexer, table); 633 if (lhs == NULL) return NULL; 634 635 if (lexer_peek(lexer).type == TOKEN_PIPE) { 636 lexer_lex(lexer); 637 638 pos_t rhs_pos = lexer_cur_pos(lexer); 639 expr_t *rhs = _parse_alt_expr(lexer, table); 640 if (rhs == NULL) { 641 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), rhs_pos, 1)); 642 syntax_expr_free(lhs); 643 return NULL; 644 } 645 646 expr_t *disjunction = _alloc_expr(); 647 disjunction->tag = EXPR_ALT; 648 disjunction->pos = lhs->pos; 649 disjunction->source = lhs->source; 650 disjunction->as.alt.try = lhs; 651 disjunction->as.alt.orelse = rhs; 652 653 return disjunction; 654 } 655 656 return lhs; 657} 658 659static expr_t *_parse_expr(lexer_t *lexer, intern_table_t *table) { 660 expr_t *lhs = NULL; 661 expr_t **params = NULL; 662 size_t nparams = 0; 663 664 lhs = _parse_alt_expr(lexer, table); 665 if (lhs == NULL) return NULL; 666 667 token_type next_token = lexer_peek(lexer).type; 668 669 if (next_token == TOKEN_COLON) { 670 lexer_lex(lexer); 671 if (lexer_peek(lexer).type == TOKEN_PIPE) { 672 673 lexer_lex(lexer); 674 } 675 pos_t body_pos = lexer_cur_pos(lexer); 676 677 if (lhs->tag == EXPR_TUPLE) { 678 nparams = lhs->as.tuple.nelements; 679 params = lhs->as.tuple.elements; 680 681 lhs->as.tuple.elements = NULL; 682 lhs->as.tuple.nelements = 0; 683 684 syntax_expr_free(lhs); 685 lhs = NULL; 686 687 } else { 688 nparams = 1; 689 params = malloc(sizeof(expr_t*)); 690 if (params == NULL) die("Out of memory: cannot allocate params"); 691 params[0] = lhs; 692 lhs = NULL; 693 } 694 695 expr_t *body = _parse_expr(lexer, table); 696 if (body == NULL) { 697 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), body_pos, 1)); 698 if (params != NULL) { 699 for(size_t i=0; i<nparams; i++) syntax_expr_free(params[i]); 700 free(params); 701 } 702 return NULL; 703 } 704 705 expr_t *lambda = _alloc_expr(); 706 lambda->tag = EXPR_LAMBDA; 707 lambda->pos = params[0]->pos; 708 lambda->source = params[0]->source; 709 lambda->as.lambda_raw.patterns = params; 710 lambda->as.lambda_raw.npatterns = nparams; 711 lambda->as.lambda_raw.body = body; 712 return lambda; 713 } 714 715 return lhs; 716} 717 718 719 720static size_t _scope_check_pattern(expr_t *it, scope_t *scope) { 721 switch (it->tag) { 722 case EXPR_ALT: 723 case EXPR_LAMBDA: 724 case EXPR_LAMBDA_COMPILED: 725 case EXPR_GROUP: 726 case EXPR_SEQ: 727 case EXPR_BUILTIN: 728 case EXPR_LET_BINDING: 729 error_report(error(ERROR_INVALID_PATTERN, it->source,it->pos,1)); 730 return (size_t)-1; 731 case EXPR_VAR: 732 die("Impossible happened: Found a var node when scope-checking."); 733 case EXPR_TUPLE: { 734 size_t i1 = 0; 735 for (int i = 0; i < it->as.tuple.nelements; i++) { 736 size_t i2 = _scope_check_pattern(it->as.tuple.elements[i],scope); 737 if (i2 == (size_t)-1) return i2; 738 i1 += i2; 739 } 740 return i1; 741 } 742 case EXPR_CALL: { 743 if (it->as.call.callee->tag != EXPR_CALL 744 && it->as.call.callee->tag != EXPR_CONSTRUCTOR) { 745 error_report(error(ERROR_INVALID_PATTERN, it->source,it->pos,1)); 746 return (size_t)-1; 747 } 748 size_t i1 = _scope_check_pattern(it->as.call.callee, scope); 749 if (i1 == (size_t)-1) return i1; 750 for (int i = 0; i < it->as.call.nargs; i++) { 751 size_t i2 = _scope_check_pattern(it->as.call.arguments[i],scope); 752 if (i2 == (size_t)-1) return i2; 753 i1 += i2; 754 } 755 return i1; 756 } 757 case EXPR_STRING_LIT: 758 case EXPR_INT_LIT: 759 case EXPR_FLOAT_LIT: 760 case EXPR_CONSTRUCTOR: 761 return 0; 762 case EXPR_PAT_IDENT: { 763 scope_push(scope,it->as.ident); 764 return 1; 765 } 766 } 767} 768static inline size_t max(size_t a, size_t b) { 769 return (a > b) ? a : b; 770} 771static size_t _max_let_depth(expr_t *it) { 772 switch (it->tag) { 773 case EXPR_ALT: 774 case EXPR_SEQ: 775 return max(_max_let_depth(it->as.alt.try),_max_let_depth(it->as.alt.orelse)); 776 case EXPR_CALL: { 777 size_t ret = _max_let_depth(it->as.call.callee); 778 for (int i = 0; i < it->as.call.nargs;i++) { 779 ret = max(ret,_max_let_depth(it->as.call.arguments[i])); 780 } 781 return ret; 782 } 783 case EXPR_LET_BINDING: { 784 size_t expr = _max_let_depth(it->as.let_binding.expr); 785 return max(expr,_max_let_depth(it->as.let_binding.in) + it->as.let_binding.pattern.bound_vars); 786 } 787 case EXPR_TUPLE: 788 case EXPR_GROUP: { 789 size_t ret = 0; 790 for (int i = 0; i < it->as.tuple.nelements;i++) { 791 ret = max(ret,_max_let_depth(it->as.tuple.elements[i])); 792 } 793 return ret; 794 } 795 case EXPR_LAMBDA: 796 case EXPR_LAMBDA_COMPILED: 797 case EXPR_BUILTIN: 798 case EXPR_INT_LIT: 799 case EXPR_FLOAT_LIT: 800 case EXPR_PAT_IDENT: 801 case EXPR_STRING_LIT: 802 case EXPR_VAR: 803 case EXPR_CONSTRUCTOR: 804 return 0; 805 } 806} 807 808size_t syntax_expr_required_locals(expr_t *it) { 809 return _max_let_depth(it); 810} 811 812static char* INTERNAL_NAMES[] 813 = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", 814 "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"}; 815 816static bool _scope_check_expr(expr_t *it, scope_t *scope, builtins_t *builtins, bitset_t *used) { 817 818 //printf("SCOPE CHECK:"); 819 //scope_debug_dump(scope); 820 //syntax_dump_expr(it); 821 //printf("\n"); 822 switch (it->tag) { 823 case EXPR_ALT: 824 case EXPR_SEQ: 825 return _scope_check_expr(it->as.alt.try, scope, builtins, used) 826 && _scope_check_expr(it->as.alt.orelse, scope, builtins, used); 827 case EXPR_LAMBDA: { 828 size_t idx = 0; 829 expr_t *body = it->as.lambda_raw.body; 830 size_t num_args = it->as.lambda_raw.npatterns; 831 for (int i = 0; i < it->as.lambda_raw.npatterns; i++) { 832 if (it->as.lambda_raw.patterns[i]->tag == EXPR_PAT_IDENT) { 833 scope_push(scope, it->as.lambda_raw.patterns[i]->as.ident); 834 } else { 835 expr_t *ident_expr = _alloc_expr(); 836 ident_expr->tag = EXPR_PAT_IDENT; 837 ident_expr->pos = it->as.lambda_raw.patterns[i]->pos; 838 ident_expr->source = it->as.lambda_raw.patterns[i]->source; 839 ident_expr->as.ident = INTERNAL_NAMES[idx++]; 840 scope_push(scope, ident_expr->as.ident); 841 expr_t *let_binding = _alloc_expr(); 842 let_binding->tag = EXPR_LET_BINDING; 843 let_binding->pos = it->as.lambda_raw.patterns[i]->pos; 844 let_binding->source = it->as.lambda_raw.patterns[i]->source; 845 let_binding->as.let_binding.pattern.expr = it->as.lambda_raw.patterns[i]; 846 let_binding->as.let_binding.pattern.bound_vars = (size_t)-1; 847 let_binding->as.let_binding.expr = ident_expr; 848 let_binding->as.let_binding.in = body; 849 body = let_binding; 850 } 851 } 852 it->tag = EXPR_LAMBDA_COMPILED; 853 it->as.lambda.body = body; 854 it->as.lambda.args = num_args; 855 856 bitset_t *lambda_mask = bitset_alloc(); 857 bool ret = _scope_check_expr(it->as.lambda.body,scope, builtins, lambda_mask); 858 bitset_shift_down(lambda_mask,num_args); 859 it->as.lambda.mask = lambda_mask; 860 bitset_union(used, lambda_mask); 861 it->as.lambda.num_lets = _max_let_depth(it->as.lambda.body); 862 scope_pop(scope,num_args); 863 return ret; 864 } 865 case EXPR_LET_BINDING: { 866 if (!_scope_check_expr(it->as.let_binding.expr,scope,builtins, used)) 867 return false; 868 size_t amount = _scope_check_pattern(it->as.let_binding.pattern.expr,scope); 869 if (amount == (size_t)-1) 870 return false; 871 it->as.let_binding.pattern.bound_vars = amount; 872 bitset_shift_up(used, amount); 873 bool ret = _scope_check_expr(it->as.let_binding.in,scope,builtins, used); 874 bitset_shift_down(used, amount); 875 scope_pop(scope,amount); 876 return ret; 877 } 878 case EXPR_CALL: 879 if (_scope_check_expr(it->as.call.callee, scope,builtins, used)) { 880 for (int i = 0; i < it->as.call.nargs; i++) { 881 if (!_scope_check_expr(it->as.call.arguments[i], scope,builtins, used)) 882 return false; 883 } 884 return true; 885 } else return false; 886 case EXPR_STRING_LIT: 887 case EXPR_INT_LIT: 888 case EXPR_FLOAT_LIT: 889 case EXPR_CONSTRUCTOR: 890 return true; 891 case EXPR_TUPLE: 892 // TODO 893 case EXPR_GROUP: 894 for (int i = 0; i < it->as.group.nclauses; i++) { 895 if (!_scope_check_expr(it->as.group.clauses[i], scope,builtins, used)) 896 return false; 897 } 898 return true; 899 900 case EXPR_LAMBDA_COMPILED: 901 case EXPR_VAR: 902 case EXPR_BUILTIN: 903 die("Impossible happened: Found a compiled node when scope-checking."); 904 case EXPR_PAT_IDENT: { 905 de_bruijn_t idx = scope_lookup(scope, it->as.ident); 906 if (idx == (de_bruijn_t)-1) { 907 builtin_func_t *ptr = builtins_lookup(builtins, it->as.ident); 908 if (ptr != NULL) { 909 it->tag = EXPR_BUILTIN; 910 it->as.builtin = ptr; 911 return true; 912 } else { 913 //printf("FOO: %s\n", it->as.ident); 914 error_report(error(ERROR_UNKNOWN_VARIABLE, it->source, it->pos, strlen(it->as.ident))); 915 return false; 916 } 917 } 918 it->tag = EXPR_VAR; 919 it->as.var = idx; 920 bitset_set(used,idx); 921 return true; 922 } 923 } 924} 925 926expr_t *syntax_parse_expr(lexer_t *lexer, builtins_t *builtins, intern_table_t *table) { 927 expr_t *it = _parse_expr(lexer, table); 928 if (it == NULL) return NULL; 929 bitset_t *used = bitset_alloc(); 930 scope_t *scope = scope_alloc(); 931 syntax_dump_expr(it); 932 _scope_check_expr(it, scope, builtins, used); 933 return it; 934} 935void syntax_expr_free(expr_t *it) { 936 switch (it->tag) { 937 case EXPR_ALT: 938 case EXPR_SEQ: 939 syntax_expr_free(it->as.alt.try); 940 syntax_expr_free(it->as.alt.orelse); 941 break; 942 case EXPR_LAMBDA: 943 for (int i = 0; i < it->as.lambda_raw.npatterns;i++) { 944 syntax_expr_free(it->as.lambda_raw.patterns[i]); 945 } 946 free(it->as.lambda_raw.patterns); 947 syntax_expr_free(it->as.lambda_raw.body); 948 break; 949 case EXPR_LAMBDA_COMPILED: 950 syntax_expr_free(it->as.lambda.body); 951 if (it->as.lambda.mask != NULL) bitset_free(it->as.lambda.mask); 952 break; 953 954 case EXPR_LET_BINDING: 955 syntax_expr_free(it->as.let_binding.expr); 956 syntax_expr_free(it->as.let_binding.in); 957 syntax_expr_free(it->as.let_binding.pattern.expr); 958 break; 959 960 case EXPR_TUPLE: 961 for (int i = 0; i < it->as.tuple.nelements;i++) { 962 syntax_expr_free(it->as.tuple.elements[i]); 963 } 964 free(it->as.tuple.elements); 965 break; 966 case EXPR_GROUP: 967 for (int i = 0; i < it->as.group.nclauses;i++) { 968 syntax_expr_free(it->as.group.clauses[i]); 969 } 970 free(it->as.group.clauses); 971 break; 972 case EXPR_CALL: 973 syntax_expr_free(it->as.call.callee); 974 for (int i = 0; i < it->as.call.nargs;i++) { 975 syntax_expr_free(it->as.call.arguments[i]); 976 } 977 free(it->as.call.arguments); 978 break; 979 case EXPR_STRING_LIT: 980 free(it->as.string_lit); 981 break; 982 default: break; 983 } 984 free(it); 985} 986 987 988void syntax_dump_expr(expr_t *it) { 989 switch (it->tag) { 990 case EXPR_ALT: 991 printf("alt("); 992 goto bits; 993 case EXPR_SEQ: 994 printf("seq("); 995 bits: 996 syntax_dump_expr(it->as.alt.try); 997 printf(","); 998 syntax_dump_expr(it->as.alt.orelse); 999 printf(")"); 1000 break; 1001 case EXPR_LAMBDA: 1002 printf("lambda("); 1003 for (int i = 0; i < it->as.lambda_raw.npatterns;i++) { 1004 syntax_dump_expr(it->as.lambda_raw.patterns[i]); 1005 printf(": "); 1006 } 1007 syntax_dump_expr(it->as.lambda_raw.body); 1008 printf(")"); 1009 break; 1010 case EXPR_LET_BINDING: 1011 printf("let("); 1012 syntax_dump_expr(it->as.let_binding.pattern.expr); 1013 printf(":="); 1014 syntax_dump_expr(it->as.let_binding.expr); 1015 printf(";"); 1016 syntax_dump_expr(it->as.let_binding.in); 1017 printf(")"); 1018 break; 1019 case EXPR_CALL: 1020 syntax_dump_expr(it->as.call.callee); 1021 printf("("); 1022 for (int i = 0; i < it->as.call.nargs;i++) { 1023 syntax_dump_expr(it->as.call.arguments[i]); 1024 printf(","); 1025 } 1026 printf(")"); 1027 break; 1028 case EXPR_TUPLE: 1029 printf("("); 1030 for (int i = 0; i < it->as.tuple.nelements;i++) { 1031 syntax_dump_expr(it->as.tuple.elements[i]); 1032 printf(","); 1033 } 1034 printf(")"); 1035 break; 1036 case EXPR_BUILTIN: 1037 printf("<builtin%p>",it->as.builtin); 1038 break; 1039 case EXPR_LAMBDA_COMPILED: 1040 printf("lambda_c(%zu,",it->as.lambda.args); 1041 syntax_dump_expr(it->as.lambda.body); 1042 printf(")"); 1043 break; 1044 case EXPR_GROUP: 1045 printf("{"); 1046 for (int i = 0; i < it->as.group.nclauses;i++) { 1047 syntax_dump_expr(it->as.group.clauses[i]); 1048 printf(","); 1049 } 1050 printf("}"); 1051 break; 1052 case EXPR_STRING_LIT: 1053 printf("\"%s\"",it->as.string_lit); 1054 break; 1055 case EXPR_INT_LIT: 1056 printf("%ld", it->as.int_lit); 1057 break; 1058 case EXPR_FLOAT_LIT: 1059 printf("%f", it->as.float_lit); 1060 break; 1061 case EXPR_PAT_IDENT: 1062 printf("%s", it->as.ident); 1063 break; 1064 case EXPR_VAR: 1065 printf("\\%zu", it->as.var); 1066 break; 1067 case EXPR_CONSTRUCTOR: 1068 printf("<%s>", it->as.constructor); 1069 break; 1070 default: break; 1071 } 1072} 1073