the hito embeddable programming language
1#include "syntax.h"
2#include "bitset.h"
3#include "builtins.h"
4#include "error.h"
5#include "lexer.h"
6#include "scope.h"
7#include "util.h"
8#include "intern.h"
9#include "value.h"
10#include <string.h>
11#include <ctype.h>
12#include <stdlib.h>
13#include <stdbool.h>
14#include <stdio.h>
15
16static bool _expect(lexer_t *lexer, token_type expected, error_tag on_failure) {
17 token_t tok = lexer_lex(lexer);
18 if (tok.type != expected) {
19 error_report(error(on_failure,lexer_source(lexer),tok.pos,tok.len));
20 return false;
21 } else {
22 return true;
23 }
24}
25
26static expr_t *_alloc_expr(void) {
27 expr_t *ptr = malloc(sizeof(struct expr));
28 if (ptr == NULL) {
29 die("Out of memory: cannot allocate expr struct");
30 }
31 return ptr;
32}
33
34static char *_parse_string_literal(const char *input) {
35 if (!input || *input != '"') {
36 die("Provided string is not a string literal");
37 }
38 const char *p = input + 1; // skip opening quote
39 size_t capacity = 16;
40 size_t length = 0;
41 char *result = malloc(capacity);
42 if (result == NULL) {
43 die("Out of memory: Cannot allocate space for string literal");
44 }
45
46 while (*p != '\0' && *p != '"') {
47 char c = *p++;
48 if (c == '\\') {
49 if (!*p) break; // unterminated escape
50 char esc = *p++;
51 switch (esc) {
52 case 'n': c = '\n'; break;
53 case 't': c = '\t'; break;
54 case 'r': c = '\r'; break;
55 case '\\': c = '\\'; break;
56 case '"': c = '"'; break;
57 case '\'': c = '\''; break;
58 default: c = esc; break;
59 }
60 }
61 if (length + 1 >= capacity) {
62 capacity *= 2;
63 char *tmp = realloc(result, capacity);
64 if (tmp == NULL) {
65 die("Out of memory: Cannot enlarge space for string literal");
66 }
67 result = tmp;
68 }
69 result[length++] = c;
70 }
71 if (*p != '"') {
72 die("The impossible happened");
73 }
74
75 result[length] = '\0'; // null-terminate
76 return result;
77}
78
79static expr_t *_parse_expr(lexer_t *lexer, intern_table_t *table);
80
81
82static expr_t *_parse_atom(lexer_t *lexer, intern_table_t *table) {
83 token_t tok = lexer_peek(lexer);
84 switch (tok.type) {
85 case TOKEN_LPAREN: {
86 lexer_lex(lexer);
87
88 expr_t *first_expr = _parse_expr(lexer, table);
89 token_t next_tok = lexer_peek(lexer);
90
91 if (first_expr == NULL) {
92 if (next_tok.type == TOKEN_RPAREN) {
93 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), next_tok.pos, 1));
94
95 lexer_lex(lexer); // Consume ')'
96 return NULL;
97 }
98 return NULL;
99 }
100
101 if (next_tok.type != TOKEN_COMMA) {
102 // Single element parenthesized expression
103 if (!_expect(lexer, TOKEN_RPAREN, ERROR_EXPECTED_RPAREN)) {
104 syntax_expr_free(first_expr);
105 return NULL;
106 }
107 return first_expr;
108 }
109
110 // Multi-element tuple
111 lexer_lex(lexer); // consume the first ','
112
113 size_t cap = 4;
114 size_t nargs = 1;
115 expr_t **args = malloc(cap * sizeof(expr_t*));
116 if (args == NULL)
117 die("Out of memory: can't allocate args array for tuple");
118
119 args[0] = first_expr;
120
121 while (1) {
122 token_t next_arg_tok = lexer_peek(lexer);
123 expr_t *arg = _parse_expr(lexer, table);
124
125 if (arg == NULL) {
126 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), next_arg_tok.pos, 1));
127
128 for (size_t i = 0; i < nargs; i++) {
129 syntax_expr_free(args[i]);
130 }
131 free(args);
132 return NULL;
133 }
134
135 if (nargs == cap) {
136 cap *= 2;
137 expr_t **temp_args = realloc(args, cap * sizeof(expr_t*));
138 if (temp_args == NULL)
139 die("Out of memory: can't enlarge args array for tuple");
140 args = temp_args;
141 }
142
143 args[nargs++] = arg;
144
145 token_t post_arg_tok = lexer_peek(lexer);
146 if (post_arg_tok.type != TOKEN_COMMA) {
147 break;
148 }
149 lexer_lex(lexer); // consume ','
150 }
151
152 if (!_expect(lexer, TOKEN_RPAREN, ERROR_EXPECTED_RPAREN)) {
153 for (size_t i = 0; i < nargs; i++) {
154 syntax_expr_free(args[i]);
155 }
156 free(args);
157 return NULL;
158 }
159
160 if (nargs < cap) {
161 expr_t **temp_args = realloc(args, nargs * sizeof(expr_t*));
162 if (temp_args == NULL) {
163 die("Out of memory: can't resize args array for tuple");
164 }
165 args = temp_args;
166 }
167
168 expr_t *tuple = _alloc_expr();
169 tuple->pos = tok.pos;
170 tuple->source = lexer_source(lexer);
171 tuple->tag = EXPR_TUPLE;
172 tuple->as.tuple.elements = args;
173 tuple->as.tuple.nelements = nargs;
174 return tuple;
175 }
176 case TOKEN_LBRACE: {
177 lexer_lex(lexer);
178
179 expr_t *first_expr = _parse_expr(lexer, table);
180 token_t next_tok = lexer_peek(lexer);
181
182 if (first_expr == NULL) {
183 if (next_tok.type == TOKEN_RBRACE) {
184 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), next_tok.pos, 1));
185
186 lexer_lex(lexer); // Consume ')'
187 return NULL;
188 }
189 return NULL;
190 }
191
192 if (next_tok.type != TOKEN_COMMA) {
193 // Single element parenthesized expression
194 if (!_expect(lexer, TOKEN_RBRACE, ERROR_EXPECTED_RBRACE)) {
195 syntax_expr_free(first_expr);
196 return NULL;
197 }
198 return first_expr;
199 }
200
201 // Multi-element tuple
202 lexer_lex(lexer); // consume the first ','
203
204 size_t cap = 4;
205 size_t nargs = 1;
206 expr_t **args = malloc(cap * sizeof(expr_t*));
207 if (args == NULL)
208 die("Out of memory: can't allocate args array for function group");
209
210 args[0] = first_expr;
211
212 while (1) {
213 token_t next_arg_tok = lexer_peek(lexer);
214 expr_t *arg = _parse_expr(lexer, table);
215
216 if (arg == NULL) {
217 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), next_arg_tok.pos, 1));
218
219 for (size_t i = 0; i < nargs; i++) {
220 syntax_expr_free(args[i]);
221 }
222 free(args);
223 return NULL;
224 }
225
226 if (nargs == cap) {
227 cap *= 2;
228 expr_t **temp_args = realloc(args, cap * sizeof(expr_t*));
229 if (temp_args == NULL)
230 die("Out of memory: can't enlarge args array for function group");
231 args = temp_args;
232 }
233
234 args[nargs++] = arg;
235
236 token_t post_arg_tok = lexer_peek(lexer);
237 if (post_arg_tok.type != TOKEN_COMMA) {
238 break;
239 }
240 lexer_lex(lexer); // consume ','
241 }
242
243 if (!_expect(lexer, TOKEN_RBRACE, ERROR_EXPECTED_RBRACE)) {
244 for (size_t i = 0; i < nargs; i++) {
245 syntax_expr_free(args[i]);
246 }
247 free(args);
248 return NULL;
249 }
250
251 if (nargs < cap) {
252 expr_t **temp_args = realloc(args, nargs * sizeof(expr_t*));
253 if (temp_args == NULL) {
254 die("Out of memory: can't resize args array for function group");
255 }
256 args = temp_args;
257 }
258
259 expr_t *tuple = _alloc_expr();
260 tuple->pos = tok.pos;
261 tuple->source = lexer_source(lexer);
262 tuple->tag = EXPR_GROUP;
263 tuple->as.tuple.elements = args;
264 tuple->as.tuple.nelements = nargs;
265 return tuple;
266 }
267 case TOKEN_IDENT: {
268 lexer_lex(lexer);
269 intern_t name = intern_prefix(table,tok.start,tok.len);
270 expr_t *it = _alloc_expr();
271 it->pos = tok.pos;
272 it->source = lexer_source(lexer);
273 it->tag = EXPR_PAT_IDENT;
274 it->as.ident = name;
275 return it;
276 }
277 case TOKEN_FLOAT_LIT: {
278 lexer_lex(lexer);
279 double value = strtod(tok.start, NULL);
280 expr_t *lit = _alloc_expr();
281 lit->pos = tok.pos;
282 lit->source = lexer_source(lexer);
283 lit->tag = EXPR_FLOAT_LIT;
284 lit->as.float_lit = value;
285 return lit;
286 }
287 case TOKEN_INT_LIT: {
288 lexer_lex(lexer);
289 long value = strtol(tok.start, NULL, 10);
290 expr_t *lit = _alloc_expr();
291 lit->pos = tok.pos;
292 lit->source = lexer_source(lexer);
293 lit->tag = EXPR_INT_LIT;
294 lit->as.int_lit = value;
295 return lit;
296 }
297 case TOKEN_CONSTRUCTOR: {
298 lexer_lex(lexer);
299 intern_t name = intern_prefix(table,tok.start,tok.len);
300 expr_t *it = _alloc_expr();
301 it->pos = tok.pos;
302 it->source = lexer_source(lexer);
303 it->tag = EXPR_CONSTRUCTOR;
304 it->as.ident = name;
305 return it;
306 }
307 case TOKEN_STRING_LIT: {
308 lexer_lex(lexer);
309 char* contents = _parse_string_literal(tok.start);
310 expr_t *lit = _alloc_expr();
311 lit->pos = tok.pos;
312 lit->source = lexer_source(lexer);
313 lit->tag = EXPR_STRING_LIT;
314 lit->as.string_lit = contents;
315 return lit;
316 }
317 default: return NULL;
318 }
319}
320
321static expr_t *_parse_call_expr(lexer_t *lexer, intern_table_t *table) {
322 expr_t *lhs = _parse_atom(lexer, table);
323 if (lhs == NULL) return NULL;
324
325 // Loop to handle chained calls like f(a)(b)
326 // If we see '(', we treat 'lhs' as the callee, parse arguments,
327 // update 'lhs' to the new call node, and repeat.
328 while (lexer_peek(lexer).type == TOKEN_LPAREN) {
329 lexer_lex(lexer); // consume '('
330
331 size_t cap = 4;
332 size_t nargs = 0;
333 expr_t **args = malloc(cap * sizeof(expr_t*));
334 if (args == NULL) die("Out of memory: can't allocate args array");
335
336 while (1) {
337 pos_t arg_start_pos = lexer_cur_pos(lexer);
338 // Use _parse_expr here to allow complex args like f(a + b)
339 expr_t *arg = _parse_expr(lexer, table);
340
341 if (arg == NULL) {
342 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), arg_start_pos, 1));
343
344 // Cleanup
345 for (size_t i = 0; i < nargs; i++) {
346 syntax_expr_free(args[i]);
347 }
348 free(args);
349 syntax_expr_free(lhs);
350 return NULL;
351 }
352
353 if (nargs == cap) {
354 cap *= 2;
355 args = realloc(args, cap * sizeof(expr_t*));
356 if (args == NULL)
357 die("Out of memory: can't enlarge args array");
358 }
359
360 args[nargs++] = arg;
361
362 // If we see a comma, consume it and continue to next arg
363 if (lexer_peek(lexer).type == TOKEN_COMMA) {
364 lexer_lex(lexer);
365 continue;
366 }
367
368 // If no comma, we expect the closing parenthesis
369 break;
370 }
371
372 if (!_expect(lexer, TOKEN_RPAREN, ERROR_EXPECTED_RPAREN)) {
373 // Cleanup
374 for (size_t i = 0; i < nargs; i++) {
375 syntax_expr_free(args[i]);
376 }
377 free(args);
378 syntax_expr_free(lhs);
379 return NULL;
380 }
381 // _expect consumes ')' on success
382
383 // shrink array to exact size
384 args = realloc(args, nargs * sizeof(expr_t*));
385 if (args == NULL)
386 die("Out of memory: can't resize args array");
387
388 // build n-ary call node
389 expr_t *call = _alloc_expr();
390 call->pos = lhs->pos;
391 call->source = lexer_source(lexer);
392 call->tag = EXPR_CALL;
393 call->as.call.callee = lhs;
394 call->as.call.arguments = args;
395 call->as.call.nargs = nargs;
396
397 // The result of this call becomes the LHS for the next potential call in the chain
398 lhs = call;
399 }
400
401 return lhs;
402}
403
404static expr_t *_parse_arith_plus(lexer_t *lexer, intern_table_t *table) {
405 expr_t *lhs = _parse_call_expr(lexer, table);
406 while (1) {
407 token_t tok = lexer_peek(lexer);
408 intern_t name = (tok.type == TOKEN_PLUS) ? intern(table, "+")
409 : (tok.type == TOKEN_MINUS) ? intern(table,"-")
410 : NULL;
411 if (name == NULL) {
412 return lhs;
413 }
414 lexer_lex(lexer);
415 expr_t *rhs = _parse_call_expr(lexer, table);
416 expr_t *it = _alloc_expr();
417 it->pos = tok.pos;
418 it->source = lexer_source(lexer);
419 it->tag = EXPR_CALL;
420 it->as.call.arguments = malloc(sizeof(expr_t*) * 2);
421 if (it->as.call.arguments == NULL)
422 die("Out of memory: cannot allocate args array");
423 it->as.call.arguments[0] = lhs;
424 it->as.call.arguments[1] = rhs;
425 it->as.call.nargs = 2;
426 it->as.call.callee = _alloc_expr();
427 it->as.call.callee->tag = EXPR_PAT_IDENT;
428 it->as.call.callee->pos = tok.pos;
429 it->as.call.callee->source = lexer_source(lexer);
430 it->as.call.callee->as.ident = name;
431 lhs = it;
432 }
433}
434
435
436static expr_t *_parse_arith(lexer_t *lexer, intern_table_t *table) {
437 expr_t *lhs = _parse_arith_plus(lexer, table);
438 while (1) {
439 token_t tok = lexer_peek(lexer);
440 intern_t name = (tok.type == TOKEN_MULT) ? intern(table, "*")
441 : (tok.type == TOKEN_DIV) ? intern(table,"/")
442 : (tok.type == TOKEN_MOD) ? intern(table,"mod")
443 : (tok.type == TOKEN_IDIV) ? intern(table,"div")
444 : NULL;
445 if (name == NULL) {
446 return lhs;
447 }
448 lexer_lex(lexer);
449 expr_t *rhs = _parse_arith_plus(lexer, table);
450 expr_t *it = _alloc_expr();
451 it->tag = EXPR_CALL;
452 it->pos = tok.pos;
453 it->source = lexer_source(lexer);
454 it->as.call.arguments = malloc(sizeof(expr_t*) * 2);
455 if (it->as.call.arguments == NULL)
456 die("Out of memory: cannot allocate args array");
457 it->as.call.arguments[0] = lhs;
458 it->as.call.arguments[1] = rhs;
459 it->as.call.nargs = 2;
460 it->as.call.callee = _alloc_expr();
461 it->as.call.callee->tag = EXPR_PAT_IDENT;
462 it->as.call.callee->pos = tok.pos;
463 it->as.call.callee->source = lexer_source(lexer);
464 it->as.call.callee->as.ident = name;
465 lhs = it;
466 }
467}
468
469
470static expr_t *_parse_binop_comp(lexer_t *lexer, intern_table_t *table) {
471 expr_t *lhs = _parse_arith(lexer, table);
472 token_t tok = lexer_peek(lexer);
473 intern_t name = (tok.type == TOKEN_EQ) ? intern(table, "=")
474 : (tok.type == TOKEN_NOTEQ) ? intern(table,"/=")
475 : (tok.type == TOKEN_GREATER) ? intern(table,">")
476 : (tok.type == TOKEN_LESS) ? intern(table,"<")
477 : (tok.type == TOKEN_LESSEQ) ? intern(table,"<=")
478 : (tok.type == TOKEN_GREATEREQ) ? intern(table,">=")
479 : NULL;
480 if (name != NULL) {
481 lexer_lex(lexer);
482 expr_t *rhs = _parse_arith(lexer, table);
483 expr_t *it = _alloc_expr();
484 it->tag = EXPR_CALL;
485 it->source = lexer_source(lexer);
486 it->pos = tok.pos;
487 it->as.call.arguments = malloc(sizeof(expr_t*) * 2);
488 if (it->as.call.arguments == NULL)
489 die("Out of memory: cannot allocate args array");
490 it->as.call.arguments[0] = lhs;
491 it->as.call.arguments[1] = rhs;
492 it->as.call.nargs = 2;
493 it->as.call.callee = _alloc_expr();
494 it->as.call.callee->pos = tok.pos;
495 it->as.call.callee->source = lexer_source(lexer);
496 it->as.call.callee->tag = EXPR_PAT_IDENT;
497 it->as.call.callee->as.ident = name;
498 return it;
499 }
500 return lhs;
501}
502
503static expr_t *_parse_binop_and(lexer_t *lexer, intern_table_t *table) {
504 expr_t *lhs = _parse_binop_comp(lexer, table);
505 token_t tok = lexer_peek(lexer);
506 if (tok.type == TOKEN_AND) {
507 lexer_lex(lexer);
508 expr_t *rhs = _parse_binop_and(lexer, table);
509 expr_t *it = _alloc_expr();
510 it->source = lexer_source(lexer);
511 it->pos = tok.pos;
512 it->tag = EXPR_SEQ;
513 it->as.seq.first = lhs;
514 it->as.seq.then = rhs;
515 return it;
516 }
517 return lhs;
518}
519static expr_t *_parse_binop_or(lexer_t *lexer, intern_table_t *table) {
520 expr_t *lhs = _parse_binop_and(lexer, table);
521 token_t tok = lexer_peek(lexer);
522 if (tok.type == TOKEN_OR) {
523 lexer_lex(lexer);
524 expr_t *rhs = _parse_binop_or(lexer, table);
525 expr_t *it = _alloc_expr();
526 it->tag = EXPR_ALT;
527 it->source = lexer_source(lexer);
528 it->pos = tok.pos;
529 it->as.alt.try = lhs;
530 it->as.alt.orelse = rhs;
531 return it;
532 }
533 return lhs;
534}
535/*
536static expr_t *_parse_binop_or(lexer_t *lexer, intern_table_t *table) {
537 expr_t *lhs = _parse_binop_and(lexer, table);
538 token_t tok = lexer_peek(lexer);
539 intern_t or_name = intern(table, "or");
540 if (tok.type == TOKEN_OR) {
541 lexer_lex(lexer);
542 expr_t *rhs = _parse_binop_or(lexer, table);
543 expr_t *it = _alloc_expr();
544 it->tag = EXPR_CALL;
545 it->as.call.arguments = malloc(sizeof(expr_t*) * 2);
546 if (it->as.call.arguments == NULL)
547 die("Out of memory: cannot allocate args array");
548 it->as.call.arguments[0] = lhs;
549 it->as.call.arguments[1] = rhs;
550 it->as.call.callee = _alloc_expr();
551 it->as.call.callee->tag = EXPR_PAT_IDENT;
552 it->as.call.callee->as.ident = or_name;
553 return it;
554 }
555 return lhs;
556}
557*/
558static expr_t *_parse_sequence_expr(lexer_t *lexer, intern_table_t *table) {
559 token_t tok = lexer_peek(lexer);
560 if (tok.type == TOKEN_IF) {
561 lexer_lex(lexer);
562 expr_t *lhs = _parse_call_expr(lexer, table);
563 if (lhs == NULL) {
564 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), tok.pos, 1));
565 }
566 pos_t pos = lexer_cur_pos(lexer);
567 expr_t *rhs = _parse_sequence_expr(lexer, table);
568 if (rhs == NULL) {
569 syntax_expr_free(lhs);
570 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), pos, 1));
571 }
572
573 expr_t *sequence = _alloc_expr();
574 sequence->tag = EXPR_SEQ;
575 sequence->pos = lhs->pos;
576 sequence->source = lhs->source;
577 sequence->as.seq.first = lhs;
578 sequence->as.seq.then = rhs;
579 return sequence;
580 }
581 expr_t *lhs = _parse_binop_or(lexer, table);
582 if (lhs == NULL) return NULL;
583
584 token_type next_token = lexer_peek(lexer).type;
585
586 if (next_token == TOKEN_ASSIGN) {
587 expr_t *pattern = lhs;
588 lhs = NULL; // Ownership transferred to 'pattern' variable
589 lexer_lex(lexer); // consume :=
590 pos_t value_pos = lexer_cur_pos(lexer);
591
592 expr_t *value_expr = _parse_expr(lexer, table);
593 if (value_expr == NULL) {
594 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), value_pos, 1));
595 syntax_expr_free(pattern);
596 return NULL;
597 }
598
599 if (!_expect(lexer, TOKEN_SEMI, ERROR_EXPECTED_SEMI)) {
600 syntax_expr_free(value_expr);
601 syntax_expr_free(pattern);
602 return NULL;
603 }
604
605 pos_t body_pos = lexer_cur_pos(lexer);
606 expr_t *body_expr = _parse_sequence_expr(lexer, table);
607 if (body_expr == NULL) {
608 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), body_pos, 1));
609 syntax_expr_free(value_expr);
610 // Cleanup: Free the pattern node
611 syntax_expr_free(pattern);
612 return NULL;
613 }
614
615 expr_t *let_binding = _alloc_expr();
616 let_binding->tag = EXPR_LET_BINDING;
617 let_binding->pos = pattern->pos;
618 let_binding->source = pattern->source;
619 let_binding->as.let_binding.pattern.expr = pattern;
620 let_binding->as.let_binding.pattern.bound_vars = (size_t)-1;
621 let_binding->as.let_binding.expr = value_expr;
622 let_binding->as.let_binding.in = body_expr;
623
624 return let_binding;
625 }
626
627
628 return lhs;
629}
630
631static expr_t *_parse_alt_expr(lexer_t *lexer, intern_table_t *table) {
632 expr_t *lhs = _parse_sequence_expr(lexer, table);
633 if (lhs == NULL) return NULL;
634
635 if (lexer_peek(lexer).type == TOKEN_PIPE) {
636 lexer_lex(lexer);
637
638 pos_t rhs_pos = lexer_cur_pos(lexer);
639 expr_t *rhs = _parse_alt_expr(lexer, table);
640 if (rhs == NULL) {
641 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), rhs_pos, 1));
642 syntax_expr_free(lhs);
643 return NULL;
644 }
645
646 expr_t *disjunction = _alloc_expr();
647 disjunction->tag = EXPR_ALT;
648 disjunction->pos = lhs->pos;
649 disjunction->source = lhs->source;
650 disjunction->as.alt.try = lhs;
651 disjunction->as.alt.orelse = rhs;
652
653 return disjunction;
654 }
655
656 return lhs;
657}
658
659static expr_t *_parse_expr(lexer_t *lexer, intern_table_t *table) {
660 expr_t *lhs = NULL;
661 expr_t **params = NULL;
662 size_t nparams = 0;
663
664 lhs = _parse_alt_expr(lexer, table);
665 if (lhs == NULL) return NULL;
666
667 token_type next_token = lexer_peek(lexer).type;
668
669 if (next_token == TOKEN_COLON) {
670 lexer_lex(lexer);
671 if (lexer_peek(lexer).type == TOKEN_PIPE) {
672
673 lexer_lex(lexer);
674 }
675 pos_t body_pos = lexer_cur_pos(lexer);
676
677 if (lhs->tag == EXPR_TUPLE) {
678 nparams = lhs->as.tuple.nelements;
679 params = lhs->as.tuple.elements;
680
681 lhs->as.tuple.elements = NULL;
682 lhs->as.tuple.nelements = 0;
683
684 syntax_expr_free(lhs);
685 lhs = NULL;
686
687 } else {
688 nparams = 1;
689 params = malloc(sizeof(expr_t*));
690 if (params == NULL) die("Out of memory: cannot allocate params");
691 params[0] = lhs;
692 lhs = NULL;
693 }
694
695 expr_t *body = _parse_expr(lexer, table);
696 if (body == NULL) {
697 error_report(error(ERROR_EXPECTED_EXPRESSION, lexer_source(lexer), body_pos, 1));
698 if (params != NULL) {
699 for(size_t i=0; i<nparams; i++) syntax_expr_free(params[i]);
700 free(params);
701 }
702 return NULL;
703 }
704
705 expr_t *lambda = _alloc_expr();
706 lambda->tag = EXPR_LAMBDA;
707 lambda->pos = params[0]->pos;
708 lambda->source = params[0]->source;
709 lambda->as.lambda_raw.patterns = params;
710 lambda->as.lambda_raw.npatterns = nparams;
711 lambda->as.lambda_raw.body = body;
712 return lambda;
713 }
714
715 return lhs;
716}
717
718
719
720static size_t _scope_check_pattern(expr_t *it, scope_t *scope) {
721 switch (it->tag) {
722 case EXPR_ALT:
723 case EXPR_LAMBDA:
724 case EXPR_LAMBDA_COMPILED:
725 case EXPR_GROUP:
726 case EXPR_SEQ:
727 case EXPR_BUILTIN:
728 case EXPR_LET_BINDING:
729 error_report(error(ERROR_INVALID_PATTERN, it->source,it->pos,1));
730 return (size_t)-1;
731 case EXPR_VAR:
732 die("Impossible happened: Found a var node when scope-checking.");
733 case EXPR_TUPLE: {
734 size_t i1 = 0;
735 for (int i = 0; i < it->as.tuple.nelements; i++) {
736 size_t i2 = _scope_check_pattern(it->as.tuple.elements[i],scope);
737 if (i2 == (size_t)-1) return i2;
738 i1 += i2;
739 }
740 return i1;
741 }
742 case EXPR_CALL: {
743 if (it->as.call.callee->tag != EXPR_CALL
744 && it->as.call.callee->tag != EXPR_CONSTRUCTOR) {
745 error_report(error(ERROR_INVALID_PATTERN, it->source,it->pos,1));
746 return (size_t)-1;
747 }
748 size_t i1 = _scope_check_pattern(it->as.call.callee, scope);
749 if (i1 == (size_t)-1) return i1;
750 for (int i = 0; i < it->as.call.nargs; i++) {
751 size_t i2 = _scope_check_pattern(it->as.call.arguments[i],scope);
752 if (i2 == (size_t)-1) return i2;
753 i1 += i2;
754 }
755 return i1;
756 }
757 case EXPR_STRING_LIT:
758 case EXPR_INT_LIT:
759 case EXPR_FLOAT_LIT:
760 case EXPR_CONSTRUCTOR:
761 return 0;
762 case EXPR_PAT_IDENT: {
763 scope_push(scope,it->as.ident);
764 return 1;
765 }
766 }
767}
768static inline size_t max(size_t a, size_t b) {
769 return (a > b) ? a : b;
770}
771static size_t _max_let_depth(expr_t *it) {
772 switch (it->tag) {
773 case EXPR_ALT:
774 case EXPR_SEQ:
775 return max(_max_let_depth(it->as.alt.try),_max_let_depth(it->as.alt.orelse));
776 case EXPR_CALL: {
777 size_t ret = _max_let_depth(it->as.call.callee);
778 for (int i = 0; i < it->as.call.nargs;i++) {
779 ret = max(ret,_max_let_depth(it->as.call.arguments[i]));
780 }
781 return ret;
782 }
783 case EXPR_LET_BINDING: {
784 size_t expr = _max_let_depth(it->as.let_binding.expr);
785 return max(expr,_max_let_depth(it->as.let_binding.in) + it->as.let_binding.pattern.bound_vars);
786 }
787 case EXPR_TUPLE:
788 case EXPR_GROUP: {
789 size_t ret = 0;
790 for (int i = 0; i < it->as.tuple.nelements;i++) {
791 ret = max(ret,_max_let_depth(it->as.tuple.elements[i]));
792 }
793 return ret;
794 }
795 case EXPR_LAMBDA:
796 case EXPR_LAMBDA_COMPILED:
797 case EXPR_BUILTIN:
798 case EXPR_INT_LIT:
799 case EXPR_FLOAT_LIT:
800 case EXPR_PAT_IDENT:
801 case EXPR_STRING_LIT:
802 case EXPR_VAR:
803 case EXPR_CONSTRUCTOR:
804 return 0;
805 }
806}
807
808size_t syntax_expr_required_locals(expr_t *it) {
809 return _max_let_depth(it);
810}
811
812static char* INTERNAL_NAMES[]
813 = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
814 "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"};
815
816static bool _scope_check_expr(expr_t *it, scope_t *scope, builtins_t *builtins, bitset_t *used) {
817
818 //printf("SCOPE CHECK:");
819 //scope_debug_dump(scope);
820 //syntax_dump_expr(it);
821 //printf("\n");
822 switch (it->tag) {
823 case EXPR_ALT:
824 case EXPR_SEQ:
825 return _scope_check_expr(it->as.alt.try, scope, builtins, used)
826 && _scope_check_expr(it->as.alt.orelse, scope, builtins, used);
827 case EXPR_LAMBDA: {
828 size_t idx = 0;
829 expr_t *body = it->as.lambda_raw.body;
830 size_t num_args = it->as.lambda_raw.npatterns;
831 for (int i = 0; i < it->as.lambda_raw.npatterns; i++) {
832 if (it->as.lambda_raw.patterns[i]->tag == EXPR_PAT_IDENT) {
833 scope_push(scope, it->as.lambda_raw.patterns[i]->as.ident);
834 } else {
835 expr_t *ident_expr = _alloc_expr();
836 ident_expr->tag = EXPR_PAT_IDENT;
837 ident_expr->pos = it->as.lambda_raw.patterns[i]->pos;
838 ident_expr->source = it->as.lambda_raw.patterns[i]->source;
839 ident_expr->as.ident = INTERNAL_NAMES[idx++];
840 scope_push(scope, ident_expr->as.ident);
841 expr_t *let_binding = _alloc_expr();
842 let_binding->tag = EXPR_LET_BINDING;
843 let_binding->pos = it->as.lambda_raw.patterns[i]->pos;
844 let_binding->source = it->as.lambda_raw.patterns[i]->source;
845 let_binding->as.let_binding.pattern.expr = it->as.lambda_raw.patterns[i];
846 let_binding->as.let_binding.pattern.bound_vars = (size_t)-1;
847 let_binding->as.let_binding.expr = ident_expr;
848 let_binding->as.let_binding.in = body;
849 body = let_binding;
850 }
851 }
852 it->tag = EXPR_LAMBDA_COMPILED;
853 it->as.lambda.body = body;
854 it->as.lambda.args = num_args;
855
856 bitset_t *lambda_mask = bitset_alloc();
857 bool ret = _scope_check_expr(it->as.lambda.body,scope, builtins, lambda_mask);
858 bitset_shift_down(lambda_mask,num_args);
859 it->as.lambda.mask = lambda_mask;
860 bitset_union(used, lambda_mask);
861 it->as.lambda.num_lets = _max_let_depth(it->as.lambda.body);
862 scope_pop(scope,num_args);
863 return ret;
864 }
865 case EXPR_LET_BINDING: {
866 if (!_scope_check_expr(it->as.let_binding.expr,scope,builtins, used))
867 return false;
868 size_t amount = _scope_check_pattern(it->as.let_binding.pattern.expr,scope);
869 if (amount == (size_t)-1)
870 return false;
871 it->as.let_binding.pattern.bound_vars = amount;
872 bitset_shift_up(used, amount);
873 bool ret = _scope_check_expr(it->as.let_binding.in,scope,builtins, used);
874 bitset_shift_down(used, amount);
875 scope_pop(scope,amount);
876 return ret;
877 }
878 case EXPR_CALL:
879 if (_scope_check_expr(it->as.call.callee, scope,builtins, used)) {
880 for (int i = 0; i < it->as.call.nargs; i++) {
881 if (!_scope_check_expr(it->as.call.arguments[i], scope,builtins, used))
882 return false;
883 }
884 return true;
885 } else return false;
886 case EXPR_STRING_LIT:
887 case EXPR_INT_LIT:
888 case EXPR_FLOAT_LIT:
889 case EXPR_CONSTRUCTOR:
890 return true;
891 case EXPR_TUPLE:
892 // TODO
893 case EXPR_GROUP:
894 for (int i = 0; i < it->as.group.nclauses; i++) {
895 if (!_scope_check_expr(it->as.group.clauses[i], scope,builtins, used))
896 return false;
897 }
898 return true;
899
900 case EXPR_LAMBDA_COMPILED:
901 case EXPR_VAR:
902 case EXPR_BUILTIN:
903 die("Impossible happened: Found a compiled node when scope-checking.");
904 case EXPR_PAT_IDENT: {
905 de_bruijn_t idx = scope_lookup(scope, it->as.ident);
906 if (idx == (de_bruijn_t)-1) {
907 builtin_func_t *ptr = builtins_lookup(builtins, it->as.ident);
908 if (ptr != NULL) {
909 it->tag = EXPR_BUILTIN;
910 it->as.builtin = ptr;
911 return true;
912 } else {
913 //printf("FOO: %s\n", it->as.ident);
914 error_report(error(ERROR_UNKNOWN_VARIABLE, it->source, it->pos, strlen(it->as.ident)));
915 return false;
916 }
917 }
918 it->tag = EXPR_VAR;
919 it->as.var = idx;
920 bitset_set(used,idx);
921 return true;
922 }
923 }
924}
925
926expr_t *syntax_parse_expr(lexer_t *lexer, builtins_t *builtins, intern_table_t *table) {
927 expr_t *it = _parse_expr(lexer, table);
928 if (it == NULL) return NULL;
929 bitset_t *used = bitset_alloc();
930 scope_t *scope = scope_alloc();
931 syntax_dump_expr(it);
932 _scope_check_expr(it, scope, builtins, used);
933 return it;
934}
935void syntax_expr_free(expr_t *it) {
936 switch (it->tag) {
937 case EXPR_ALT:
938 case EXPR_SEQ:
939 syntax_expr_free(it->as.alt.try);
940 syntax_expr_free(it->as.alt.orelse);
941 break;
942 case EXPR_LAMBDA:
943 for (int i = 0; i < it->as.lambda_raw.npatterns;i++) {
944 syntax_expr_free(it->as.lambda_raw.patterns[i]);
945 }
946 free(it->as.lambda_raw.patterns);
947 syntax_expr_free(it->as.lambda_raw.body);
948 break;
949 case EXPR_LAMBDA_COMPILED:
950 syntax_expr_free(it->as.lambda.body);
951 if (it->as.lambda.mask != NULL) bitset_free(it->as.lambda.mask);
952 break;
953
954 case EXPR_LET_BINDING:
955 syntax_expr_free(it->as.let_binding.expr);
956 syntax_expr_free(it->as.let_binding.in);
957 syntax_expr_free(it->as.let_binding.pattern.expr);
958 break;
959
960 case EXPR_TUPLE:
961 for (int i = 0; i < it->as.tuple.nelements;i++) {
962 syntax_expr_free(it->as.tuple.elements[i]);
963 }
964 free(it->as.tuple.elements);
965 break;
966 case EXPR_GROUP:
967 for (int i = 0; i < it->as.group.nclauses;i++) {
968 syntax_expr_free(it->as.group.clauses[i]);
969 }
970 free(it->as.group.clauses);
971 break;
972 case EXPR_CALL:
973 syntax_expr_free(it->as.call.callee);
974 for (int i = 0; i < it->as.call.nargs;i++) {
975 syntax_expr_free(it->as.call.arguments[i]);
976 }
977 free(it->as.call.arguments);
978 break;
979 case EXPR_STRING_LIT:
980 free(it->as.string_lit);
981 break;
982 default: break;
983 }
984 free(it);
985}
986
987
988void syntax_dump_expr(expr_t *it) {
989 switch (it->tag) {
990 case EXPR_ALT:
991 printf("alt(");
992 goto bits;
993 case EXPR_SEQ:
994 printf("seq(");
995 bits:
996 syntax_dump_expr(it->as.alt.try);
997 printf(",");
998 syntax_dump_expr(it->as.alt.orelse);
999 printf(")");
1000 break;
1001 case EXPR_LAMBDA:
1002 printf("lambda(");
1003 for (int i = 0; i < it->as.lambda_raw.npatterns;i++) {
1004 syntax_dump_expr(it->as.lambda_raw.patterns[i]);
1005 printf(": ");
1006 }
1007 syntax_dump_expr(it->as.lambda_raw.body);
1008 printf(")");
1009 break;
1010 case EXPR_LET_BINDING:
1011 printf("let(");
1012 syntax_dump_expr(it->as.let_binding.pattern.expr);
1013 printf(":=");
1014 syntax_dump_expr(it->as.let_binding.expr);
1015 printf(";");
1016 syntax_dump_expr(it->as.let_binding.in);
1017 printf(")");
1018 break;
1019 case EXPR_CALL:
1020 syntax_dump_expr(it->as.call.callee);
1021 printf("(");
1022 for (int i = 0; i < it->as.call.nargs;i++) {
1023 syntax_dump_expr(it->as.call.arguments[i]);
1024 printf(",");
1025 }
1026 printf(")");
1027 break;
1028 case EXPR_TUPLE:
1029 printf("(");
1030 for (int i = 0; i < it->as.tuple.nelements;i++) {
1031 syntax_dump_expr(it->as.tuple.elements[i]);
1032 printf(",");
1033 }
1034 printf(")");
1035 break;
1036 case EXPR_BUILTIN:
1037 printf("<builtin%p>",it->as.builtin);
1038 break;
1039 case EXPR_LAMBDA_COMPILED:
1040 printf("lambda_c(%zu,",it->as.lambda.args);
1041 syntax_dump_expr(it->as.lambda.body);
1042 printf(")");
1043 break;
1044 case EXPR_GROUP:
1045 printf("{");
1046 for (int i = 0; i < it->as.group.nclauses;i++) {
1047 syntax_dump_expr(it->as.group.clauses[i]);
1048 printf(",");
1049 }
1050 printf("}");
1051 break;
1052 case EXPR_STRING_LIT:
1053 printf("\"%s\"",it->as.string_lit);
1054 break;
1055 case EXPR_INT_LIT:
1056 printf("%ld", it->as.int_lit);
1057 break;
1058 case EXPR_FLOAT_LIT:
1059 printf("%f", it->as.float_lit);
1060 break;
1061 case EXPR_PAT_IDENT:
1062 printf("%s", it->as.ident);
1063 break;
1064 case EXPR_VAR:
1065 printf("\\%zu", it->as.var);
1066 break;
1067 case EXPR_CONSTRUCTOR:
1068 printf("<%s>", it->as.constructor);
1069 break;
1070 default: break;
1071 }
1072}
1073