improve parser

autumn 06466d18 7bb9cc7a

Changed files
+185 -104
crone
+1
.gitignore
··· 1 1 .temp 2 2 program 3 3 manifesto 4 + crone/lang/design
+2
crone/core/core.c
··· 1 1 2 + #include <string.h> 2 3 #include <stdint.h> 3 4 #include <stddef.h> 4 5 5 6 #include <core.h> 6 7 7 8 #include "ptr_list.h" 9 + #include "list.h" 8 10
+43
crone/core/list.h
··· 1 + 2 + list list_allocate(size_t initial_capacity, size_t element_size) { 3 + list list = { 4 + .count = 0, 5 + .capacity = initial_capacity, 6 + .element_size = element_size, 7 + .data = malloc(element_size * initial_capacity) 8 + }; 9 + if (list.data == NULL) { CRASH("failed malloc: list"); } 10 + return list; 11 + } 12 + 13 + void list_append(list *list, void *item) { 14 + list->count += 1; 15 + if (list->count > list->capacity) { 16 + /*========* Next power of two *========*/ 17 + list->capacity = list->count; 18 + list->capacity |= list->capacity >> 1; 19 + list->capacity |= list->capacity >> 2; 20 + list->capacity |= list->capacity >> 4; 21 + list->capacity |= list->capacity >> 8; 22 + list->capacity |= list->capacity >> 16; 23 + if (sizeof(void*) > sizeof(uint32_t)) { 24 + list->capacity |= list->capacity >> 32; 25 + } 26 + ++list->capacity; 27 + /*=====================================*/ 28 + 29 + list->data = realloc(list->data, list->element_size * list->capacity); 30 + if (list->data == NULL) { CRASH("failed realloc: list"); } 31 + } 32 + 33 + memcpy(list->data + (list->count - 1) * list->element_size, item, list->element_size); 34 + } 35 + 36 + void *list_element(list list, size_t index) { 37 + return list.data + index * list.element_size; 38 + } 39 + 40 + void list_cleanup(list list) { 41 + free(list.data); 42 + } 43 +
+2 -2
crone/core/ptr_list.h
··· 1 1 2 - extern inline ptr_list ptrs_allocate(size_t initial_capacity) { 2 + ptr_list ptrs_allocate(size_t initial_capacity) { 3 3 ptr_list list = { 4 4 .count = 0, 5 5 .capacity = initial_capacity, ··· 32 32 list->data[list->count - 1] = item; 33 33 } 34 34 35 - extern inline void ptrs_cleanup(ptr_list list) { 35 + void ptrs_cleanup(ptr_list list) { 36 36 free(list.data); 37 37 } 38 38
+6
crone/interface/core.h
··· 1 1 2 2 #include "core/ptr_list.h" 3 + #include "core/list.h" 3 4 #include "core/dev.h" 4 5 6 + typedef struct string { 7 + char *data; 8 + size_t length; 9 + } string; 10 +
+16
crone/interface/core/list.h
··· 1 + 2 + typedef struct list { 3 + size_t count; 4 + size_t capacity; 5 + size_t element_size; 6 + void* *data; 7 + } list; 8 + 9 + list list_allocate(size_t initial_capacity, size_t element_size); 10 + 11 + void list_append(list *list, void *item); 12 + 13 + void *list_element(list list, size_t index); 14 + 15 + void list_cleanup(list list); 16 +
+2 -1
crone/interface/lang.h
··· 1 1 2 2 #include <stdio.h> 3 + #include <core.h> 3 4 4 - void execute(char *crone_script, size_t length); 5 + void execute_crone(string crone_script); 5 6
+85 -99
crone/lang/interpreter.c
··· 1 1 2 2 #include <unistd.h> 3 3 #include <ctype.h> 4 + #include <string.h> 4 5 5 6 #include <lang.h> 6 7 #include <core.h> 7 8 8 9 #define STATES(OP) \ 10 + OP(INVALID) \ 9 11 OP(OUTER_SPACE) \ 10 12 OP(TERM) \ 11 - OP(PRELUDIC_SPACE) \ 12 - OP(PRELUDE) \ 13 + OP(INNER_SPACE) \ 13 14 OP(BLOCK) \ 14 15 15 16 #define COMMA_SEPARATED(VALUE) VALUE, ··· 24 25 }; 25 26 26 27 void print_substring(char *start, size_t length) { 27 - fprintf(stderr, "%.*s", (int)length, start); 28 - } 29 - 30 - bool ends_prelude(char c) { 31 - return c == ';' || c == '{'; 32 - } 33 - 34 - bool starts_term(char c) { 35 - return !isspace(c); 36 - } 37 - 38 - bool ends_term(char c) { 39 - return isspace(c); 40 - } 41 - 42 - bool ends_block(char c) { 43 - 44 - return c == '}'; 28 + for (size_t i = 0; i < length; ++i) { 29 + if (start[i] == '\n') { 30 + fputc('\\', stderr); 31 + fputc('n', stderr); 32 + } else { 33 + fputc(start[i], stderr); 34 + } 35 + } 45 36 } 46 37 47 38 typedef struct parse { 48 39 char *start; 49 40 size_t length; 50 41 parser_state parsed_as; 51 - void *subparse; 42 + list subparses; 52 43 } parse; 53 44 54 - void print_parse(parse p) { 45 + const char *spaces = " "; 46 + 47 + void print_parse(parse p, size_t indent) { 48 + size_t space_count = 4 * indent; 49 + if (space_count > strlen(spaces)) { 50 + space_count = strlen(spaces); 51 + } 52 + fwrite(spaces, 1, space_count, stderr); 55 53 fprintf(stderr, "parsed as %s: \"", state_names[p.parsed_as]); 56 54 print_substring(p.start, p.length); 57 55 fprintf(stderr, "\"\n"); 58 56 } 59 57 60 - void execute(char *crone_script, size_t length) { 58 + inline void transition(list *l, parse *p, char *end_position, parser_state next_state) { 59 + p->length = (size_t)end_position - (size_t)(p->start); 60 + list_append(l,p); 61 + p->start = end_position; 62 + p->parsed_as = next_state; 63 + } 61 64 62 - parser_state state = OUTER_SPACE; 65 + // TODO UNICODE 63 66 64 - ptr_list substrings = ptrs_allocate(512); 67 + list/*subparses*/ parse_crone(string crone_script, size_t *p_position) { 68 + list parses = list_allocate(512, sizeof(parse)); 69 + 70 + parse current_parse; 71 + 72 + current_parse.start = crone_script.data + (*p_position); 73 + current_parse.parsed_as = OUTER_SPACE; 65 74 66 - char *start = crone_script; 75 + #define NEXT(state) transition(&parses, &current_parse, crone_script.data + (*p_position), state) 67 76 68 - size_t sublength = 0; 77 + // optimize w/ computed goto? 69 78 70 - for (size_t index = 0; index < length; ++index) { 71 - // TODO: actual unicode parsing lol 72 - char _char = crone_script[index]; 73 - switch (state) { 74 - // TODO: simplify entire state machine to be less repetitive 79 + for (; (*p_position) < crone_script.length; ++(*p_position)) { 80 + char _char = crone_script.data[*p_position]; 81 + switch (current_parse.parsed_as) { 75 82 case OUTER_SPACE: 76 - if (starts_term(_char)) { 77 - parse *p = malloc(sizeof(parse)); 78 - p->start = start; 79 - p->length = sublength; 80 - p->parsed_as = state; 81 - ptrs_append(&substrings, p); 82 - start = (void*)(crone_script + index); 83 - sublength = 0; 84 - state = TERM; 85 - } else { 86 - ++sublength; 83 + if (_char == '}') { 84 + NEXT(INVALID); 85 + } else if (!isspace(_char)) { 86 + NEXT(TERM); 87 87 } 88 88 break; 89 - case PRELUDIC_SPACE: 90 - ++sublength; 91 - if (starts_term(_char)) { 92 - parse *p = malloc(sizeof(parse)); 93 - p->start = start; 94 - p->length = sublength; 95 - p->parsed_as = state; 96 - ptrs_append(&substrings, p); 97 - start = (void*)(crone_script + index); 98 - sublength = 0; 99 - state = PRELUDE; 89 + case INNER_SPACE: 90 + if (_char == '{') { 91 + NEXT(BLOCK); 92 + } else if (_char == ';') { 93 + ++(*p_position); 94 + NEXT(OUTER_SPACE); 95 + } else if (_char == '}') { 96 + NEXT(INVALID); 97 + } else if (!isspace(_char)) { 98 + NEXT(TERM); 100 99 } 101 100 break; 102 101 case TERM: 103 - ++sublength; 104 - if (ends_term(_char)) { 105 - parse *p = malloc(sizeof(parse)); 106 - p->start = start; 107 - p->length = sublength; 108 - p->parsed_as = state; 109 - ptrs_append(&substrings, p); 110 - start = (void*)(crone_script + index); 111 - sublength = 0; 112 - state = PRELUDIC_SPACE; 113 - } 114 - break; 115 - case PRELUDE: 116 - ++sublength; 117 - if (ends_prelude(_char)) { 118 - parse *p = malloc(sizeof(parse)); 119 - p->start = start; 120 - p->length = sublength; 121 - p->parsed_as = state; 122 - ptrs_append(&substrings, p); 123 - start = (void*)(crone_script + index); 124 - sublength = 0; 125 - state = BLOCK; 102 + if (isspace(_char)) { 103 + NEXT(INNER_SPACE); 104 + } else if (_char == '}') { 105 + NEXT(INVALID); 106 + } else if (_char == '{') { 107 + NEXT(BLOCK); 108 + } else if (_char == ';') { 109 + NEXT(OUTER_SPACE); 110 + ++(*p_position); 126 111 } 127 112 break; 128 113 case BLOCK: 129 114 // TODO parsing for block dependent on term and prelude 130 - ++sublength; 131 - if (ends_block(_char)) { 132 - parse *p = malloc(sizeof(parse)); 133 - p->start = start; 134 - p->length = sublength; 135 - p->parsed_as = state; 136 - ptrs_append(&substrings, p); 137 - start = (void*)(crone_script + index); 138 - sublength = 0; 139 - state = OUTER_SPACE; 140 - } 115 + current_parse.subparses = parse_crone(crone_script, p_position); 116 + NEXT(OUTER_SPACE); 141 117 break; 118 + case INVALID: 119 + return parses; 120 + break; 142 121 } 143 122 } 144 123 145 - parse *p = malloc(sizeof(parse)); 146 - p->start = start; 147 - p->length = sublength; 148 - p->parsed_as = state; 149 - ptrs_append(&substrings, p); 124 + #undef NEXT 150 125 126 + list_append(&parses, &current_parse); 151 127 152 - for (int i = 0; i < substrings.count; ++i) { 153 - print_parse(*(parse *)substrings.data[i]); 154 - } 128 + return parses; 129 + } 155 130 156 - for (int i = 0; i < substrings.count; ++i) { 157 - free(substrings.data[i]); 131 + void print_parses(list parses, int indent) { 132 + for (int i = 0; i < parses.count; ++i) { 133 + parse p = *(parse*)list_element(parses, i); 134 + //if (p.parsed_as != OUTER_SPACE && p.parsed_as != INNER_SPACE) { 135 + print_parse(p, indent); 136 + if (p.parsed_as == BLOCK) { 137 + print_parses(p.subparses, indent + 1); 138 + } 139 + //} 158 140 } 141 + } 159 142 160 - ptrs_cleanup(substrings); 143 + void execute_crone(string crone_script) { 144 + size_t position = 0; 145 + list parses = parse_crone(crone_script, &position); 161 146 147 + print_parses(parses, 0); 162 148 } 163 149 164 150 /*
+20
crone/lang/parseme.cr
··· 1 + 2 + term0 term1 term2 term3 { 3 + block block 4 + 5 + block { 6 + blooooooock!!!! 7 + block!!!; 8 + 9 + block! !!! { 10 + INNERMOST BLOCK; 11 + } 12 + } 13 + 14 + block 15 + } 16 + 17 + term4 term5; 18 + 19 + term6 { block2 } 20 +
+8 -2
program.c
··· 11 11 #include <crone.h> 12 12 13 13 int main() { 14 - int fileDesc = open("./crone/core/core.cr", O_RDONLY, 0); 14 + //int fileDesc = open("./crone/core/core.cr", O_RDONLY, 0); 15 + int fileDesc = open("./crone/lang/parseme.cr", O_RDONLY, 0); 15 16 16 17 if (fileDesc == -1) { 17 18 CRASH("bad file"); ··· 30 31 CRASH("bad mmap"); 31 32 } 32 33 33 - execute(file, fileStat.st_size); 34 + string script = { 35 + .data = file, 36 + .length = fileStat.st_size 37 + }; 38 + 39 + execute_crone(script); 34 40 35 41 result = munmap(file, fileStat.st_size); 36 42