this repo has no description
at master 17 kB view raw
1// MIT License 2// 3// Copyright (c) 2024 NotNite 4// 5// Permission is hereby granted, free of charge, to any person obtaining a copy 6// of this software and associated documentation files (the "Software"), to deal 7// in the Software without restriction, including without limitation the rights 8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9// copies of the Software, and to permit persons to whom the Software is 10// furnished to do so, subject to the following conditions: 11// 12// The above copyright notice and this permission notice shall be included in all 13// copies or substantial portions of the Software. 14// 15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21// SOFTWARE. 22 23using System.Text; 24using GDWeave.Godot; 25using GDWeave.Godot.Variants; 26 27namespace Teemaw.Calico.Util; 28 29/** 30 * This is copied from https://github.com/NotNite/GDWeave/blob/main/GDWeave/Script/ScriptTokenizer.cs since this class 31 * was not made visible. Minor modifications have been made to make it more ergonomic to tokenize snippets. 32 */ 33public static class ScriptTokenizer { 34 private static readonly Dictionary<string, TokenType> Tokens = new() { 35 {"continue", TokenType.CfContinue}, 36 {"return", TokenType.CfReturn}, 37 {"break", TokenType.CfBreak}, 38 {"match", TokenType.CfMatch}, 39 {"while", TokenType.CfWhile}, 40 {"elif", TokenType.CfElif}, 41 {"else", TokenType.CfElse}, 42 {"pass", TokenType.CfPass}, 43 {"for", TokenType.CfFor}, 44 {"if", TokenType.CfIf}, 45 {"const", TokenType.PrConst}, 46 {"var", TokenType.PrVar}, 47 {"func", TokenType.PrFunction}, 48 {"class", TokenType.PrClass}, 49 {"extends", TokenType.PrExtends}, 50 {"is", TokenType.PrIs}, 51 {"as", TokenType.PrAs}, 52 {"@onready", TokenType.PrOnready}, 53 {"@tool", TokenType.PrTool}, 54 {"@export", TokenType.PrExport}, 55 // CALICO: 56 {"yield", TokenType.PrYield}, 57 58 {"setget", TokenType.PrSetget}, 59 {"static", TokenType.PrStatic}, 60 61 {"void", TokenType.PrVoid}, 62 {"enum", TokenType.PrEnum}, 63 {"preload", TokenType.PrPreload}, 64 {"assert", TokenType.PrAssert}, 65 66 {"signal", TokenType.PrSignal}, 67 {"breakpoint", TokenType.PrBreakpoint}, 68 69 {"sync", TokenType.PrSync}, 70 {"remote", TokenType.PrRemote}, 71 {"master", TokenType.PrMaster}, 72 {"slave", TokenType.PrSlave}, 73 {"puppet", TokenType.PrPuppet}, 74 75 {"remotesync", TokenType.PrRemotesync}, 76 {"mastersync", TokenType.PrMastersync}, 77 {"puppetsync", TokenType.PrPuppetsync}, 78 79 {"\n", TokenType.Newline}, 80 81 {"PI", TokenType.ConstPi}, 82 {"TAU", TokenType.ConstTau}, 83 {"INF", TokenType.ConstInf}, 84 {"NAN", TokenType.ConstNan}, 85 86 {"error", TokenType.Error}, 87 {"cursor", TokenType.Cursor}, 88 89 {"self", TokenType.Self}, 90 91 {"in", TokenType.OpIn}, 92 93 {"_", TokenType.Wildcard}, 94 95 {"[", TokenType.BracketOpen}, 96 {"]", TokenType.BracketClose}, 97 {"{", TokenType.CurlyBracketOpen}, 98 {"}", TokenType.CurlyBracketClose}, 99 100 {"(", TokenType.ParenthesisOpen}, 101 {")", TokenType.ParenthesisClose}, 102 103 {",", TokenType.Comma}, 104 {";", TokenType.Semicolon}, 105 {".", TokenType.Period}, 106 {"?", TokenType.QuestionMark}, 107 {":", TokenType.Colon}, 108 {"$", TokenType.Dollar}, 109 {"->", TokenType.ForwardArrow}, 110 111 {">>=", TokenType.OpAssignShiftRight}, 112 {"<<=", TokenType.OpAssignShiftLeft}, 113 114 {">>", TokenType.OpShiftRight}, 115 {"<<", TokenType.OpShiftLeft}, 116 117 {"==", TokenType.OpEqual}, 118 {"!=", TokenType.OpNotEqual}, 119 {"&&", TokenType.OpAnd}, 120 {"||", TokenType.OpOr}, 121 {"!", TokenType.OpNot}, 122 123 {"+=", TokenType.OpAssignAdd}, 124 {"-=", TokenType.OpAssignSub}, 125 {"*=", TokenType.OpAssignMul}, 126 {"/=", TokenType.OpAssignDiv}, 127 {"%=", TokenType.OpAssignMod}, 128 {"&=", TokenType.OpAssignBitAnd}, 129 {"|=", TokenType.OpAssignBitOr}, 130 {"^=", TokenType.OpAssignBitXor}, 131 132 {"+", TokenType.OpAdd}, 133 {"-", TokenType.OpSub}, 134 {"*", TokenType.OpMul}, 135 {"/", TokenType.OpDiv}, 136 {"%", TokenType.OpMod}, 137 138 {"~", TokenType.OpBitInvert}, 139 {"&", TokenType.OpBitAnd}, 140 {"|", TokenType.OpBitOr}, 141 {"^", TokenType.OpBitXor}, 142 143 {"<=", TokenType.OpLessEqual}, 144 {">=", TokenType.OpGreaterEqual}, 145 {"<", TokenType.OpLess}, 146 {">", TokenType.OpGreater}, 147 148 {"=", TokenType.OpAssign}, 149 }; 150 151 private static readonly HashSet<string> Symbols = new() { 152 "->", 153 154 ">>=", 155 "<<=", 156 157 ">>", 158 "<<", 159 160 "==", 161 "!=", 162 "&&", 163 "||", 164 "!", 165 166 "+=", 167 "-=", 168 "*=", 169 "/=", 170 "%=", 171 "&=", 172 "|=", 173 "^=", 174 175 "_", 176 177 "[", 178 "]", 179 180 "{", 181 "}", 182 183 "(", 184 ")", 185 186 ",", 187 ";", 188 ".", 189 "?", 190 ":", 191 "$", 192 "+", 193 "-", 194 "*", 195 "/", 196 "%", 197 198 "~", 199 "&", 200 "|", 201 "^", 202 203 "<=", 204 ">=", 205 "<", 206 ">", 207 208 "=", 209 }; 210 211 private static readonly List<string> BuiltinFunctions = Enum.GetNames<BuiltinFunction>().ToList(); 212 213 private static void InsertNewLine(IEnumerator<string> enumerator, uint baseIndent, List<Token> toFlush) { 214 if (!enumerator.MoveNext()) { 215 return; 216 } 217 218 var tabCount = uint.Parse(enumerator.Current); 219 toFlush.Add(new Token(TokenType.Newline, tabCount + baseIndent)); 220 } 221 222 private static void BuildIdentifierName(IEnumerator<string> enumerator, List<Token> toFlush, out string? found) { 223 found = string.Empty; 224 if (!enumerator.MoveNext()) { 225 return; 226 } 227 228 if (enumerator.Current == ":") { 229 toFlush.Add(new Token(TokenType.Wildcard)); 230 toFlush.Add(new Token(TokenType.Semicolon)); 231 return; 232 } 233 234 found = "_" + enumerator.Current; 235 } 236 237 private static void BuildNumber(IEnumerator<string> enumerator, List<Token> toFlush, out bool foundFull) { 238 foundFull = true; 239 int sign = 1; 240 241 if (enumerator.Current == "-") { 242 sign = -1; 243 if (!enumerator.MoveNext()) return; 244 } 245 246 if (!long.TryParse(enumerator.Current, out long upper)) { 247 toFlush.Add(new Token(TokenType.OpSub)); 248 foundFull = false; 249 return; 250 } 251 252 if (!enumerator.MoveNext()) return; 253 254 if (enumerator.Current != ".") { 255 toFlush.Add(new ConstantToken(new IntVariant(upper * sign))); 256 foundFull = false; 257 return; 258 } 259 260 if (!enumerator.MoveNext()) return; 261 262 if (!long.TryParse(enumerator.Current, out long lower)) { 263 // I dont think there is really a proper return for here. 264 // You'd have a number that looks like this "1000." 265 // No following decimal 266 // Comment if you had ideas 267 return; 268 } 269 270 var result = upper + (lower / Math.Pow(10, lower.ToString().Length)); 271 toFlush.Add(new ConstantToken(new RealVariant(result * sign))); 272 } 273 274 public static IEnumerable<Token> Tokenize(string gdScript, uint baseIndent = 0) { 275 var finalTokens = new List<Token>(); 276 var tokens = SanitizeInput(TokenizeString(gdScript + " ")); 277 278 var previous = string.Empty; 279 var idName = string.Empty; 280 281 var toFlush = new List<Token>(2); 282 // CALICO: We don't need this since we're dealing with snippets 283 //finalTokens.Add(new Token(TokenType.Newline, baseIndent)); 284 var enumerator = tokens.GetEnumerator(); 285 var reparse = false; 286 while (reparse ? true : enumerator.MoveNext()) { 287 reparse = false; 288 289 if (enumerator.Current == "\n") { 290 InsertNewLine(enumerator, baseIndent, toFlush); 291 endAndFlushId(); 292 continue; 293 } 294 295 if (enumerator.Current == "_") { 296 BuildIdentifierName(enumerator, toFlush, out string? found); 297 if (found == string.Empty) { 298 endAndFlushId(); 299 continue; 300 } 301 302 idName += found; 303 304 end(); 305 continue; 306 } 307 308 //if (enumerator.Current == "-" || char.IsDigit(enumerator.Current[0])) { 309 if (char.IsDigit(enumerator.Current[0])) { 310 BuildNumber(enumerator, toFlush, out bool foundFull); 311 reparse = !foundFull; 312 endAndFlushId(); 313 continue; 314 } 315 316 if (BuiltinFunctions.Contains(enumerator.Current)) { 317 toFlush.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunctions.IndexOf(enumerator.Current))); 318 endAndFlushId(); 319 continue; 320 } 321 322 if (Tokens.TryGetValue(enumerator.Current, out var type)) { 323 toFlush.Add(new Token(type)); 324 endAndFlushId(); 325 continue; 326 } 327 328 if (enumerator.Current.StartsWith('"')) { 329 var current = enumerator.Current; 330 toFlush.Add(new ConstantToken(new StringVariant(current.Substring(1, current.Length - 2)))); 331 endAndFlushId(); 332 continue; 333 } 334 335 if (bool.TryParse(enumerator.Current, out var resultB)) { 336 toFlush.Add(new ConstantToken(new BoolVariant(resultB))); 337 endAndFlushId(); 338 continue; 339 } 340 341 idName += enumerator.Current; 342 343 end(); 344 345 void end() { 346 previous = enumerator.Current; 347 finalTokens.AddRange(toFlush); 348 toFlush.Clear(); 349 } 350 351 void endAndFlushId() { 352 if (idName != string.Empty) { 353 if (idName.Trim() == "return") 354 { 355 // CALICO: Hack to handle `return` being the last token of a line 356 finalTokens.Add(new Token(TokenType.CfReturn)); 357 } 358 else if (idName.Trim() == "self") 359 { 360 // CALICO: Hack to handle `self` being the last token of a line 361 finalTokens.Add(new Token(TokenType.Self)); 362 } 363 else switch (idName.Trim()) 364 { 365 // TODO: CALICO: clean up this hack 366 case "print": 367 // CALICO: Without this, `print` is tokenized as an identifier. 368 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.TextPrint)); 369 break; 370 case "min": 371 // CALICO: Without this, `min` is tokenized as an identifier. 372 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.LogicMin)); 373 break; 374 case "null": 375 // CALICO: Without this, `null` is tokenized as an identifier. 376 finalTokens.Add(new ConstantToken(new NilVariant())); 377 break; 378 case "break": 379 // CALICO: Without this, `break` is tokenized as an identifier. 380 finalTokens.Add(new Token(TokenType.CfBreak)); 381 break; 382 case "match": 383 // CALICO: Without this, `match` is tokenized as an identifier. 384 finalTokens.Add(new Token(TokenType.CfMatch)); 385 break; 386 case "Color": 387 // CALICO: Without this, `Color` is tokenized as an identifier. 388 finalTokens.Add(new Token(TokenType.BuiltInType, 14)); 389 break; 390 case "Vector3": 391 // CALICO: Without this, `Vector3` is tokenized as an identifier. 392 finalTokens.Add(new Token(TokenType.BuiltInType, 7)); 393 break; 394 case "lerp_angle": 395 // CALICO: Without this, `lerp_angle` is tokenized as an identifier. 396 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.MathLerpAngle)); 397 break; 398 case "int": 399 // CALICO: https://docs.godotengine.org/en/3.5/tutorials/io/binary_serialization_api.html 400 finalTokens.Add(new Token(TokenType.BuiltInType, 2)); 401 break; 402 case "pow": 403 // CALICO: 404 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.MathPow)); 405 break; 406 case "abs": 407 // CALICO: 408 finalTokens.Add(new Token(TokenType.BuiltInFunc, (uint?) BuiltinFunction.MathAbs)); 409 break; 410 default: 411 // CALICO: We change this to trim the idName, otherwise the whitespace messes with the token 412 finalTokens.Add(new IdentifierToken(idName.Trim())); 413 break; 414 } 415 idName = string.Empty; 416 } 417 418 end(); 419 } 420 } 421 422 // CALICO: We don't need this since we're dealing with snippets 423 //finalTokens.Add(new(TokenType.Newline, baseIndent)); 424 425 foreach (var t in finalTokens) yield return t; 426 } 427 428 private static IEnumerable<string> SanitizeInput(IEnumerable<string> tokens) { 429 foreach (var token in tokens) { 430 if (token != "\n" && string.IsNullOrWhiteSpace(token)) { 431 continue; 432 } 433 434 yield return token; 435 } 436 } 437 438 private static IEnumerable<string> TokenizeString(string text) { 439 StringBuilder builder = new(20); 440 for (var i = 0; i < text.Length; i++) { 441 switch (text[i]) { 442 case '"': { 443 yield return ClearBuilder(); 444 builder.Append('"'); 445 i++; 446 for (; i < text.Length; i++) { 447 builder.Append(text[i]); 448 if (text[i] == '"') { 449 break; 450 } 451 } 452 453 yield return ClearBuilder(); 454 continue; 455 } 456 457 // This is stupid and awful 458 case '\n': { 459 yield return ClearBuilder(); 460 var start = i; 461 i++; 462 for (; i < text.Length && text[i] == '\t'; i++) ; 463 i--; 464 yield return "\n"; 465 yield return $"{i - start}"; 466 continue; 467 } 468 } 469 470 var matched = false; 471 foreach (var delimiter in Symbols) { 472 if (Match(text, i, delimiter)) { 473 yield return ClearBuilder(); 474 yield return delimiter; 475 i += delimiter.Length - 1; 476 matched = true; 477 break; 478 } 479 } 480 481 if (matched) continue; 482 483 if (text[i] == ' ') { 484 yield return ClearBuilder(); 485 continue; 486 } 487 488 builder.Append(text[i]); 489 } 490 491 yield return "\n"; 492 493 string ClearBuilder() { 494 var built = builder.ToString(); 495 builder.Clear(); 496 return built; 497 } 498 } 499 500 private static bool Match(string text, int index, string match) { 501 if (index + match.Length > text.Length) return false; 502 for (var i = 0; i < match.Length; i++) { 503 if (text[index + i] != match[i]) return false; 504 } 505 return true; 506 } 507}