this repo has no description
1// MIT License
2//
3// Copyright (c) 2024 NotNite
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23using System.Text;
24using GDWeave.Godot;
25using GDWeave.Godot.Variants;
26
27namespace Teemaw.Calico.Util;
28
29/**
30 * This is copied from https://github.com/NotNite/GDWeave/blob/main/GDWeave/Script/ScriptTokenizer.cs since this class
31 * was not made visible. Minor modifications have been made to make it more ergonomic to tokenize snippets.
32 */
33public static class ScriptTokenizer {
34 private static readonly Dictionary<string, TokenType> Tokens = new() {
35 {"continue", TokenType.CfContinue},
36 {"return", TokenType.CfReturn},
37 {"break", TokenType.CfBreak},
38 {"match", TokenType.CfMatch},
39 {"while", TokenType.CfWhile},
40 {"elif", TokenType.CfElif},
41 {"else", TokenType.CfElse},
42 {"pass", TokenType.CfPass},
43 {"for", TokenType.CfFor},
44 {"if", TokenType.CfIf},
45 {"const", TokenType.PrConst},
46 {"var", TokenType.PrVar},
47 {"func", TokenType.PrFunction},
48 {"class", TokenType.PrClass},
49 {"extends", TokenType.PrExtends},
50 {"is", TokenType.PrIs},
51 {"as", TokenType.PrAs},
52 {"@onready", TokenType.PrOnready},
53 {"@tool", TokenType.PrTool},
54 {"@export", TokenType.PrExport},
55 // CALICO:
56 {"yield", TokenType.PrYield},
57
58 {"setget", TokenType.PrSetget},
59 {"static", TokenType.PrStatic},
60
61 {"void", TokenType.PrVoid},
62 {"enum", TokenType.PrEnum},
63 {"preload", TokenType.PrPreload},
64 {"assert", TokenType.PrAssert},
65
66 {"signal", TokenType.PrSignal},
67 {"breakpoint", TokenType.PrBreakpoint},
68
69 {"sync", TokenType.PrSync},
70 {"remote", TokenType.PrRemote},
71 {"master", TokenType.PrMaster},
72 {"slave", TokenType.PrSlave},
73 {"puppet", TokenType.PrPuppet},
74
75 {"remotesync", TokenType.PrRemotesync},
76 {"mastersync", TokenType.PrMastersync},
77 {"puppetsync", TokenType.PrPuppetsync},
78
79 {"\n", TokenType.Newline},
80
81 {"PI", TokenType.ConstPi},
82 {"TAU", TokenType.ConstTau},
83 {"INF", TokenType.ConstInf},
84 {"NAN", TokenType.ConstNan},
85
86 {"error", TokenType.Error},
87 {"cursor", TokenType.Cursor},
88
89 {"self", TokenType.Self},
90
91 {"in", TokenType.OpIn},
92
93 {"_", TokenType.Wildcard},
94
95 {"[", TokenType.BracketOpen},
96 {"]", TokenType.BracketClose},
97 {"{", TokenType.CurlyBracketOpen},
98 {"}", TokenType.CurlyBracketClose},
99
100 {"(", TokenType.ParenthesisOpen},
101 {")", TokenType.ParenthesisClose},
102
103 {",", TokenType.Comma},
104 {";", TokenType.Semicolon},
105 {".", TokenType.Period},
106 {"?", TokenType.QuestionMark},
107 {":", TokenType.Colon},
108 {"$", TokenType.Dollar},
109 {"->", TokenType.ForwardArrow},
110
111 {">>=", TokenType.OpAssignShiftRight},
112 {"<<=", TokenType.OpAssignShiftLeft},
113
114 {">>", TokenType.OpShiftRight},
115 {"<<", TokenType.OpShiftLeft},
116
117 {"==", TokenType.OpEqual},
118 {"!=", TokenType.OpNotEqual},
119 {"&&", TokenType.OpAnd},
120 {"||", TokenType.OpOr},
121 {"!", TokenType.OpNot},
122
123 {"+=", TokenType.OpAssignAdd},
124 {"-=", TokenType.OpAssignSub},
125 {"*=", TokenType.OpAssignMul},
126 {"/=", TokenType.OpAssignDiv},
127 {"%=", TokenType.OpAssignMod},
128 {"&=", TokenType.OpAssignBitAnd},
129 {"|=", TokenType.OpAssignBitOr},
130 {"^=", TokenType.OpAssignBitXor},
131
132 {"+", TokenType.OpAdd},
133 {"-", TokenType.OpSub},
134 {"*", TokenType.OpMul},
135 {"/", TokenType.OpDiv},
136 {"%", TokenType.OpMod},
137
138 {"~", TokenType.OpBitInvert},
139 {"&", TokenType.OpBitAnd},
140 {"|", TokenType.OpBitOr},
141 {"^", TokenType.OpBitXor},
142
143 {"<=", TokenType.OpLessEqual},
144 {">=", TokenType.OpGreaterEqual},
145 {"<", TokenType.OpLess},
146 {">", TokenType.OpGreater},
147
148 {"=", TokenType.OpAssign},
149 };
150
151 private static readonly HashSet<string> Symbols = new() {
152 "->",
153
154 ">>=",
155 "<<=",
156
157 ">>",
158 "<<",
159
160 "==",
161 "!=",
162 "&&",
163 "||",
164 "!",
165
166 "+=",
167 "-=",
168 "*=",
169 "/=",
170 "%=",
171 "&=",
172 "|=",
173 "^=",
174
175 "_",
176
177 "[",
178 "]",
179
180 "{",
181 "}",
182
183 "(",
184 ")",
185
186 ",",
187 ";",
188 ".",
189 "?",
190 ":",
191 "$",
192 "+",
193 "-",
194 "*",
195 "/",
196 "%",
197
198 "~",
199 "&",
200 "|",
201 "^",
202
203 "<=",
204 ">=",
205 "<",
206 ">",
207
208 "=",
209 };
210
211 private static readonly List<string> BuiltinFunctions = Enum.GetNames<BuiltinFunction>().ToList();
212
213 private static void InsertNewLine(IEnumerator<string> enumerator, uint baseIndent, List<Token> toFlush) {
214 if (!enumerator.MoveNext()) {
215 return;
216 }
217
218 var tabCount = uint.Parse(enumerator.Current);
219 toFlush.Add(new Token(TokenType.Newline, tabCount + baseIndent));
220 }
221
222 private static void BuildIdentifierName(IEnumerator<string> enumerator, List<Token> toFlush, out string? found) {
223 found = string.Empty;
224 if (!enumerator.MoveNext()) {
225 return;
226 }
227
228 if (enumerator.Current == ":") {
229 toFlush.Add(new Token(TokenType.Wildcard));
230 toFlush.Add(new Token(TokenType.Semicolon));
231 return;
232 }
233
234 found = "_" + enumerator.Current;
235 }
236
237 private static void BuildNumber(IEnumerator<string> enumerator, List<Token> toFlush, out bool foundFull) {
238 foundFull = true;
239 int sign = 1;
240
241 if (enumerator.Current == "-") {
242 sign = -1;
243 if (!enumerator.MoveNext()) return;
244 }
245
246 if (!long.TryParse(enumerator.Current, out long upper)) {
247 toFlush.Add(new Token(TokenType.OpSub));
248 foundFull = false;
249 return;
250 }
251
252 if (!enumerator.MoveNext()) return;
253
254 if (enumerator.Current != ".") {
255 toFlush.Add(new ConstantToken(new IntVariant(upper * sign)));
256 foundFull = false;
257 return;
258 }
259
260 if (!enumerator.MoveNext()) return;
261
262 if (!long.TryParse(enumerator.Current, out long lower)) {
263 // I dont think there is really a proper return for here.
264 // You'd have a number that looks like this "1000."
265 // No following decimal
266 // Comment if you had ideas
267 return;
268 }
269
270 var result = upper + (lower / Math.Pow(10, lower.ToString().Length));
271 toFlush.Add(new ConstantToken(new RealVariant(result * sign)));
272 }
273
274 public static IEnumerable<Token> Tokenize(string gdScript, uint baseIndent = 0) {
275 var finalTokens = new List<Token>();
276 var tokens = SanitizeInput(TokenizeString(gdScript + " "));
277
278 var previous = string.Empty;
279 var idName = string.Empty;
280
281 var toFlush = new List<Token>(2);
282 // CALICO: We don't need this since we're dealing with snippets
283 //finalTokens.Add(new Token(TokenType.Newline, baseIndent));
284 var enumerator = tokens.GetEnumerator();
285 var reparse = false;
286 while (reparse ? true : enumerator.MoveNext()) {
287 reparse = false;
288
289 if (enumerator.Current == "\n") {
290 InsertNewLine(enumerator, baseIndent, toFlush);
291 endAndFlushId();
292 continue;
293 }
294
295 if (enumerator.Current == "_") {
296 BuildIdentifierName(enumerator, toFlush, out string? found);
297 if (found == string.Empty) {
298 endAndFlushId();
299 continue;
300 }
301
302 idName += found;
303
304 end();
305 continue;
306 }
307
308 //if (enumerator.Current == "-" || char.IsDigit(enumerator.Current[0])) {
309 if (char.IsDigit(enumerator.Current[0])) {
310 BuildNumber(enumerator, toFlush, out bool foundFull);
311 reparse = !foundFull;
312 endAndFlushId();
313 continue;
314 }
315
316 if (BuiltinFunctions.Contains(enumerator.Current)) {
317 toFlush.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunctions.IndexOf(enumerator.Current)));
318 endAndFlushId();
319 continue;
320 }
321
322 if (Tokens.TryGetValue(enumerator.Current, out var type)) {
323 toFlush.Add(new Token(type));
324 endAndFlushId();
325 continue;
326 }
327
328 if (enumerator.Current.StartsWith('"')) {
329 var current = enumerator.Current;
330 toFlush.Add(new ConstantToken(new StringVariant(current.Substring(1, current.Length - 2))));
331 endAndFlushId();
332 continue;
333 }
334
335 if (bool.TryParse(enumerator.Current, out var resultB)) {
336 toFlush.Add(new ConstantToken(new BoolVariant(resultB)));
337 endAndFlushId();
338 continue;
339 }
340
341 idName += enumerator.Current;
342
343 end();
344
345 void end() {
346 previous = enumerator.Current;
347 finalTokens.AddRange(toFlush);
348 toFlush.Clear();
349 }
350
351 void endAndFlushId() {
352 if (idName != string.Empty) {
353 if (idName.Trim() == "return")
354 {
355 // CALICO: Hack to handle `return` being the last token of a line
356 finalTokens.Add(new Token(TokenType.CfReturn));
357 }
358 else if (idName.Trim() == "self")
359 {
360 // CALICO: Hack to handle `self` being the last token of a line
361 finalTokens.Add(new Token(TokenType.Self));
362 }
363 else switch (idName.Trim())
364 {
365 // TODO: CALICO: clean up this hack
366 case "print":
367 // CALICO: Without this, `print` is tokenized as an identifier.
368 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.TextPrint));
369 break;
370 case "min":
371 // CALICO: Without this, `min` is tokenized as an identifier.
372 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.LogicMin));
373 break;
374 case "null":
375 // CALICO: Without this, `null` is tokenized as an identifier.
376 finalTokens.Add(new ConstantToken(new NilVariant()));
377 break;
378 case "break":
379 // CALICO: Without this, `break` is tokenized as an identifier.
380 finalTokens.Add(new Token(TokenType.CfBreak));
381 break;
382 case "match":
383 // CALICO: Without this, `match` is tokenized as an identifier.
384 finalTokens.Add(new Token(TokenType.CfMatch));
385 break;
386 case "Color":
387 // CALICO: Without this, `Color` is tokenized as an identifier.
388 finalTokens.Add(new Token(TokenType.BuiltInType, 14));
389 break;
390 case "Vector3":
391 // CALICO: Without this, `Vector3` is tokenized as an identifier.
392 finalTokens.Add(new Token(TokenType.BuiltInType, 7));
393 break;
394 case "lerp_angle":
395 // CALICO: Without this, `lerp_angle` is tokenized as an identifier.
396 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.MathLerpAngle));
397 break;
398 case "int":
399 // CALICO: https://docs.godotengine.org/en/3.5/tutorials/io/binary_serialization_api.html
400 finalTokens.Add(new Token(TokenType.BuiltInType, 2));
401 break;
402 case "pow":
403 // CALICO:
404 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.MathPow));
405 break;
406 case "abs":
407 // CALICO:
408 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.MathAbs));
409 break;
410 default:
411 // CALICO: We change this to trim the idName, otherwise the whitespace messes with the token
412 finalTokens.Add(new IdentifierToken(idName.Trim()));
413 break;
414 }
415 idName = string.Empty;
416 }
417
418 end();
419 }
420 }
421
422 // CALICO: We don't need this since we're dealing with snippets
423 //finalTokens.Add(new(TokenType.Newline, baseIndent));
424
425 foreach (var t in finalTokens) yield return t;
426 }
427
428 private static IEnumerable<string> SanitizeInput(IEnumerable<string> tokens) {
429 foreach (var token in tokens) {
430 if (token != "\n" && string.IsNullOrWhiteSpace(token)) {
431 continue;
432 }
433
434 yield return token;
435 }
436 }
437
438 private static IEnumerable<string> TokenizeString(string text) {
439 StringBuilder builder = new(20);
440 for (var i = 0; i < text.Length; i++) {
441 switch (text[i]) {
442 case '"': {
443 yield return ClearBuilder();
444 builder.Append('"');
445 i++;
446 for (; i < text.Length; i++) {
447 builder.Append(text[i]);
448 if (text[i] == '"') {
449 break;
450 }
451 }
452
453 yield return ClearBuilder();
454 continue;
455 }
456
457 // This is stupid and awful
458 case '\n': {
459 yield return ClearBuilder();
460 var start = i;
461 i++;
462 for (; i < text.Length && text[i] == '\t'; i++) ;
463 i--;
464 yield return "\n";
465 yield return $"{i - start}";
466 continue;
467 }
468 }
469
470 var matched = false;
471 foreach (var delimiter in Symbols) {
472 if (Match(text, i, delimiter)) {
473 yield return ClearBuilder();
474 yield return delimiter;
475 i += delimiter.Length - 1;
476 matched = true;
477 break;
478 }
479 }
480
481 if (matched) continue;
482
483 if (text[i] == ' ') {
484 yield return ClearBuilder();
485 continue;
486 }
487
488 builder.Append(text[i]);
489 }
490
491 yield return "\n";
492
493 string ClearBuilder() {
494 var built = builder.ToString();
495 builder.Clear();
496 return built;
497 }
498 }
499
500 private static bool Match(string text, int index, string match) {
501 if (index + match.Length > text.Length) return false;
502 for (var i = 0; i < match.Length; i++) {
503 if (text[index + i] != match[i]) return false;
504 }
505 return true;
506 }
507}