tree-sitter implementation for the confindent configuration language

add tests

Changed files
+153 -60
src
test
+1 -1
grammar.js
··· 14 14 // TODO: add external scanner for proper child support (dedents...) 15 15 source_file: $ => repeat($.definition), 16 16 17 - definition: $ => seq($.key, optional($.value)), 17 + definition: $ => seq($.key, optional($.value), /\n/), 18 18 19 19 key: $ => /[^ \t]+/, 20 20
+4
src/grammar.json
··· 27 27 "type": "BLANK" 28 28 } 29 29 ] 30 + }, 31 + { 32 + "type": "PATTERN", 33 + "value": "\\n" 30 34 } 31 35 ] 32 36 },
+114 -59
src/parser.c
··· 7 7 #endif 8 8 9 9 #define LANGUAGE_VERSION 15 10 - #define STATE_COUNT 7 10 + #define STATE_COUNT 9 11 11 #define LARGE_STATE_COUNT 4 12 - #define SYMBOL_COUNT 6 12 + #define SYMBOL_COUNT 7 13 13 #define ALIAS_COUNT 0 14 - #define TOKEN_COUNT 3 14 + #define TOKEN_COUNT 4 15 15 #define EXTERNAL_TOKEN_COUNT 0 16 16 #define FIELD_COUNT 0 17 - #define MAX_ALIAS_SEQUENCE_LENGTH 2 17 + #define MAX_ALIAS_SEQUENCE_LENGTH 3 18 18 #define MAX_RESERVED_WORD_SET_SIZE 0 19 19 #define PRODUCTION_ID_COUNT 1 20 20 #define SUPERTYPE_COUNT 0 21 21 22 22 enum ts_symbol_identifiers { 23 - sym_key = 1, 24 - sym_value = 2, 25 - sym_source_file = 3, 26 - sym_definition = 4, 27 - aux_sym_source_file_repeat1 = 5, 23 + aux_sym_definition_token1 = 1, 24 + sym_key = 2, 25 + sym_value = 3, 26 + sym_source_file = 4, 27 + sym_definition = 5, 28 + aux_sym_source_file_repeat1 = 6, 28 29 }; 29 30 30 31 static const char * const ts_symbol_names[] = { 31 32 [ts_builtin_sym_end] = "end", 33 + [aux_sym_definition_token1] = "definition_token1", 32 34 [sym_key] = "key", 33 35 [sym_value] = "value", 34 36 [sym_source_file] = "source_file", ··· 38 40 39 41 static const TSSymbol ts_symbol_map[] = { 40 42 [ts_builtin_sym_end] = ts_builtin_sym_end, 43 + [aux_sym_definition_token1] = aux_sym_definition_token1, 41 44 [sym_key] = sym_key, 42 45 [sym_value] = sym_value, 43 46 [sym_source_file] = sym_source_file, ··· 50 53 .visible = false, 51 54 .named = true, 52 55 }, 56 + [aux_sym_definition_token1] = { 57 + .visible = false, 58 + .named = false, 59 + }, 53 60 [sym_key] = { 54 61 .visible = true, 55 62 .named = true, ··· 88 95 [4] = 4, 89 96 [5] = 5, 90 97 [6] = 6, 98 + [7] = 7, 99 + [8] = 8, 91 100 }; 92 101 93 102 static bool ts_lex(TSLexer *lexer, TSStateId state) { ··· 95 104 eof = lexer->eof(lexer); 96 105 switch (state) { 97 106 case 0: 98 - if (eof) ADVANCE(2); 99 - if (lookahead == '\n') ADVANCE(3); 107 + if (eof) ADVANCE(4); 108 + if (lookahead == '\n') ADVANCE(5); 100 109 if (lookahead == '\t' || 101 - lookahead == ' ') ADVANCE(7); 102 - if ((0x0b <= lookahead && lookahead <= '\r')) ADVANCE(3); 103 - if (lookahead != 0) ADVANCE(4); 110 + lookahead == ' ') ADVANCE(12); 111 + if ((0x0b <= lookahead && lookahead <= '\r')) ADVANCE(8); 112 + if (lookahead != 0) ADVANCE(9); 104 113 END_STATE(); 105 114 case 1: 106 - if (eof) ADVANCE(2); 115 + if (lookahead == '\n') ADVANCE(6); 116 + if (('\t' <= lookahead && lookahead <= '\r') || 117 + lookahead == ' ') ADVANCE(13); 118 + if (lookahead != 0) ADVANCE(14); 119 + END_STATE(); 120 + case 2: 121 + if (lookahead == '\n') ADVANCE(7); 122 + if (('\t' <= lookahead && lookahead <= '\r') || 123 + lookahead == ' ') SKIP(2); 124 + END_STATE(); 125 + case 3: 126 + if (eof) ADVANCE(4); 107 127 if (lookahead == '\t' || 108 - lookahead == ' ') SKIP(1); 109 - if (('\n' <= lookahead && lookahead <= '\r')) ADVANCE(5); 110 - if (lookahead != 0) ADVANCE(6); 128 + lookahead == ' ') SKIP(3); 129 + if (('\n' <= lookahead && lookahead <= '\r')) ADVANCE(10); 130 + if (lookahead != 0) ADVANCE(11); 111 131 END_STATE(); 112 - case 2: 132 + case 4: 113 133 ACCEPT_TOKEN(ts_builtin_sym_end); 114 134 END_STATE(); 115 - case 3: 135 + case 5: 136 + ACCEPT_TOKEN(aux_sym_definition_token1); 137 + if (lookahead == '\n') ADVANCE(5); 138 + if (lookahead == '\t' || 139 + lookahead == ' ') ADVANCE(12); 140 + if ((0x0b <= lookahead && lookahead <= '\r')) ADVANCE(8); 141 + END_STATE(); 142 + case 6: 143 + ACCEPT_TOKEN(aux_sym_definition_token1); 144 + if (lookahead == '\n') ADVANCE(6); 145 + if (('\t' <= lookahead && lookahead <= '\r') || 146 + lookahead == ' ') ADVANCE(13); 147 + END_STATE(); 148 + case 7: 149 + ACCEPT_TOKEN(aux_sym_definition_token1); 150 + if (lookahead == '\n') ADVANCE(7); 151 + END_STATE(); 152 + case 8: 116 153 ACCEPT_TOKEN(sym_key); 117 - if (lookahead == '\n') ADVANCE(3); 154 + if (lookahead == '\n') ADVANCE(5); 118 155 if (lookahead == '\t' || 119 - lookahead == ' ') ADVANCE(7); 120 - if ((0x0b <= lookahead && lookahead <= '\r')) ADVANCE(3); 121 - if (lookahead != 0) ADVANCE(4); 156 + lookahead == ' ') ADVANCE(12); 157 + if ((0x0b <= lookahead && lookahead <= '\r')) ADVANCE(8); 158 + if (lookahead != 0) ADVANCE(9); 122 159 END_STATE(); 123 - case 4: 160 + case 9: 124 161 ACCEPT_TOKEN(sym_key); 125 - if (lookahead == '\n') ADVANCE(6); 162 + if (lookahead == '\n') ADVANCE(11); 126 163 if (lookahead == '\t' || 127 - lookahead == ' ') ADVANCE(8); 128 - if (lookahead != 0) ADVANCE(4); 164 + lookahead == ' ') ADVANCE(14); 165 + if (lookahead != 0) ADVANCE(9); 129 166 END_STATE(); 130 - case 5: 167 + case 10: 131 168 ACCEPT_TOKEN(sym_key); 132 - if (('\n' <= lookahead && lookahead <= '\r')) ADVANCE(5); 169 + if (('\n' <= lookahead && lookahead <= '\r')) ADVANCE(10); 133 170 if (lookahead != 0 && 134 171 (lookahead < '\t' || '\r' < lookahead) && 135 - lookahead != ' ') ADVANCE(6); 172 + lookahead != ' ') ADVANCE(11); 136 173 END_STATE(); 137 - case 6: 174 + case 11: 138 175 ACCEPT_TOKEN(sym_key); 139 176 if (lookahead != 0 && 140 177 lookahead != '\t' && 141 - lookahead != ' ') ADVANCE(6); 178 + lookahead != ' ') ADVANCE(11); 142 179 END_STATE(); 143 - case 7: 180 + case 12: 144 181 ACCEPT_TOKEN(sym_value); 145 - if (lookahead == '\n') ADVANCE(3); 182 + if (lookahead == '\n') ADVANCE(5); 146 183 if (lookahead == '\t' || 147 - lookahead == ' ') ADVANCE(7); 148 - if ((0x0b <= lookahead && lookahead <= '\r')) ADVANCE(3); 149 - if (lookahead != 0) ADVANCE(4); 184 + lookahead == ' ') ADVANCE(12); 185 + if ((0x0b <= lookahead && lookahead <= '\r')) ADVANCE(8); 186 + if (lookahead != 0) ADVANCE(9); 187 + END_STATE(); 188 + case 13: 189 + ACCEPT_TOKEN(sym_value); 190 + if (lookahead == '\n') ADVANCE(6); 191 + if (('\t' <= lookahead && lookahead <= '\r') || 192 + lookahead == ' ') ADVANCE(13); 193 + if (lookahead != 0) ADVANCE(14); 150 194 END_STATE(); 151 - case 8: 195 + case 14: 152 196 ACCEPT_TOKEN(sym_value); 153 197 if (lookahead != 0 && 154 - lookahead != '\n') ADVANCE(8); 198 + lookahead != '\n') ADVANCE(14); 155 199 END_STATE(); 156 200 default: 157 201 return false; ··· 160 204 161 205 static const TSLexerMode ts_lex_modes[STATE_COUNT] = { 162 206 [0] = {.lex_state = 0}, 163 - [1] = {.lex_state = 1}, 164 - [2] = {.lex_state = 1}, 165 - [3] = {.lex_state = 1}, 166 - [4] = {.lex_state = 0}, 167 - [5] = {.lex_state = 1}, 168 - [6] = {.lex_state = 0}, 207 + [1] = {.lex_state = 3}, 208 + [2] = {.lex_state = 3}, 209 + [3] = {.lex_state = 3}, 210 + [4] = {.lex_state = 1}, 211 + [5] = {.lex_state = 3}, 212 + [6] = {.lex_state = 3}, 213 + [7] = {.lex_state = 0}, 214 + [8] = {.lex_state = 2}, 169 215 }; 170 216 171 217 static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { 172 218 [STATE(0)] = { 173 219 [ts_builtin_sym_end] = ACTIONS(1), 220 + [aux_sym_definition_token1] = ACTIONS(1), 174 221 [sym_key] = ACTIONS(1), 175 222 [sym_value] = ACTIONS(1), 176 223 }, 177 224 [STATE(1)] = { 178 - [sym_source_file] = STATE(6), 225 + [sym_source_file] = STATE(7), 179 226 [sym_definition] = STATE(2), 180 227 [aux_sym_source_file_repeat1] = STATE(2), 181 228 [ts_builtin_sym_end] = ACTIONS(3), ··· 196 243 }; 197 244 198 245 static const uint16_t ts_small_parse_table[] = { 199 - [0] = 3, 246 + [0] = 2, 200 247 ACTIONS(14), 1, 248 + aux_sym_definition_token1, 249 + ACTIONS(16), 1, 250 + sym_value, 251 + [7] = 1, 252 + ACTIONS(18), 2, 201 253 ts_builtin_sym_end, 202 - ACTIONS(16), 1, 203 254 sym_key, 204 - ACTIONS(18), 1, 205 - sym_value, 206 - [10] = 1, 255 + [12] = 1, 207 256 ACTIONS(20), 2, 208 257 ts_builtin_sym_end, 209 258 sym_key, 210 - [15] = 1, 259 + [17] = 1, 211 260 ACTIONS(22), 1, 212 261 ts_builtin_sym_end, 262 + [21] = 1, 263 + ACTIONS(24), 1, 264 + aux_sym_definition_token1, 213 265 }; 214 266 215 267 static const uint32_t ts_small_parse_table_map[] = { 216 268 [SMALL_STATE(4)] = 0, 217 - [SMALL_STATE(5)] = 10, 218 - [SMALL_STATE(6)] = 15, 269 + [SMALL_STATE(5)] = 7, 270 + [SMALL_STATE(6)] = 12, 271 + [SMALL_STATE(7)] = 17, 272 + [SMALL_STATE(8)] = 21, 219 273 }; 220 274 221 275 static const TSParseActionEntry ts_parse_actions[] = { ··· 226 280 [7] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1, 0, 0), 227 281 [9] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), 228 282 [11] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(4), 229 - [14] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_definition, 1, 0, 0), 230 - [16] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_definition, 1, 0, 0), 231 - [18] = {.entry = {.count = 1, .reusable = false}}, SHIFT(5), 232 - [20] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_definition, 2, 0, 0), 283 + [14] = {.entry = {.count = 1, .reusable = false}}, SHIFT(5), 284 + [16] = {.entry = {.count = 1, .reusable = false}}, SHIFT(8), 285 + [18] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_definition, 2, 0, 0), 286 + [20] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_definition, 3, 0, 0), 233 287 [22] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), 288 + [24] = {.entry = {.count = 1, .reusable = true}}, SHIFT(6), 234 289 }; 235 290 236 291 #ifdef __cplusplus
+34
test/corpus/simple_values.txt
··· 1 + ======== 2 + Simple values 3 + ======== 4 + 5 + Key Value 6 + key Value 7 + key_with$pecial_char@cters value with spaces 8 + indented value 9 + indented value 10 + 11 + --- 12 + 13 + (source_file 14 + (definition 15 + (key) 16 + (value) 17 + ) 18 + (definition 19 + (key) 20 + (value) 21 + ) 22 + (definition 23 + (key) 24 + (value) 25 + ) 26 + (definition 27 + (key) 28 + (value) 29 + ) 30 + (definition 31 + (key) 32 + (value) 33 + ) 34 + )