this repo has no description
at master 13 kB view raw
1namespace Astrid; 2 3public static class Tokenizer { 4 public static List<string> Keywords = new() {"enum", "class", "if", "while", "return", "match"}; 5 6 public static Token[] TokenizeFromFile(string path) { 7 return TokenizeFromMemory(File.ReadAllLines(path)); 8 } 9 10 public static Token[] TokenizeFromMemory(string[] fileLines) { 11 for(var i = 0; i < fileLines.Length; i++) { 12 fileLines[i] = fileLines[i].Split("//")[0]; 13 } 14 15 List<Token> tokens = new List<Token>(); 16 17 string currentWord = ""; 18 19 int lineIdx; 20 int charIdx; 21 22 bool isString = false; 23 string[] boolValues = new string[] {"false", "true"}; 24 25 void HandleCurrentWord() { 26 if(isString) { 27 tokens.Add(new TokenString(currentWord, lineIdx, lineIdx, charIdx - currentWord.Length - 1, charIdx + 1)); 28 } 29 if(currentWord != "") { 30 if(!isString) 31 { 32 int ivalue; 33 float fvalue; 34 string currentWordwithoutEndSuffix = currentWord.ToString(); 35 currentWordwithoutEndSuffix = currentWordwithoutEndSuffix.Remove(currentWordwithoutEndSuffix.Length - 1, 1); 36 if(int.TryParse(currentWord, out ivalue)) 37 tokens.Add(new TokenInt(ivalue.ToString(), lineIdx, lineIdx, charIdx - ivalue.ToString().Length, charIdx)); 38 else if(float.TryParse(currentWord, out fvalue)) 39 tokens.Add(new TokenFloat(fvalue.ToString(), lineIdx, lineIdx, charIdx - fvalue.ToString().Length, charIdx)); 40 else 41 { 42 bool floatsuffixsatisifed = false; 43 if(float.TryParse(currentWordwithoutEndSuffix, out fvalue)) { 44 if(currentWord.Last() == 'f') 45 { 46 tokens.Add(new TokenFloat(fvalue.ToString(), lineIdx, lineIdx, charIdx - fvalue.ToString().Length - 1, charIdx)); 47 floatsuffixsatisifed = true; 48 } 49 } 50 51 if(!floatsuffixsatisifed) 52 { 53 if(boolValues.Contains(currentWord.ToLower())) { 54 tokens.Add(new TokenBoolean((currentWord.ToLower() == "true").ToString().ToLower(), lineIdx, lineIdx, charIdx - currentWord.Length, charIdx)); 55 } else { 56 if(Keywords.Contains(currentWord)) 57 { 58 tokens.Add(new TokenKeyword(currentWord, lineIdx, lineIdx, charIdx - currentWord.Length, charIdx)); 59 } else { 60 tokens.Add(new TokenIdentifier(currentWord, lineIdx, lineIdx, charIdx - currentWord.Length, charIdx)); 61 } 62 } 63 } 64 } 65 } 66 }// else if(currentWord == "" && isString) 67 // { 68 // tokens.Add(new TokenString(currentWord, lineIdx, lineIdx, charIdx - currentWord.Length - 1, charIdx + 1)); 69 // } 70 currentWord = ""; 71 } 72 73 lineIdx = -1; 74 foreach(string line in fileLines) { 75 lineIdx++; 76 charIdx = -1; 77 var skipNext = false; 78 foreach(char c in line) { 79 charIdx++; 80 if(skipNext) 81 { 82 skipNext = false; 83 continue; 84 } 85 86 if(isString) { 87 if(c == '"') { 88 HandleCurrentWord(); 89 isString = false; 90 continue; 91 } 92 currentWord += c; 93 continue; 94 } 95 96 switch(c) { 97 case '=': { 98 HandleCurrentWord(); 99 100 if(line[charIdx + 1] != '=') 101 tokens.Add(new TokenAssign(lineIdx, lineIdx, charIdx, charIdx + 1)); 102 else { 103 tokens.Add(new TokenEquals(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 104 skipNext = true; 105 } 106 } break; 107 case '"': { 108 isString = true; 109 } break; 110 case ',': { 111 HandleCurrentWord(); 112 113 tokens.Add(new TokenComma(lineIdx, lineIdx, charIdx, charIdx + 1)); 114 } break; 115 116 case '[': { 117 HandleCurrentWord(); 118 119 tokens.Add(new TokenArrayStart(lineIdx, lineIdx, charIdx, charIdx + 1)); 120 } break; 121 case ']': { 122 HandleCurrentWord(); 123 124 tokens.Add(new TokenArrayEnd(lineIdx, lineIdx, charIdx, charIdx + 1)); 125 } break; 126 case '(': { 127 HandleCurrentWord(); 128 129 tokens.Add(new TokenParenStart(lineIdx, lineIdx, charIdx, charIdx + 1)); 130 } break; 131 case ')': { 132 HandleCurrentWord(); 133 134 tokens.Add(new TokenParenEnd(lineIdx, lineIdx, charIdx, charIdx + 1)); 135 } break; 136 case '{': { 137 HandleCurrentWord(); 138 139 tokens.Add(new TokenBlockStart(lineIdx, lineIdx, charIdx, charIdx + 1)); 140 } break; 141 case '}': { 142 HandleCurrentWord(); 143 144 tokens.Add(new TokenBlockEnd(lineIdx, lineIdx, charIdx, charIdx + 1)); 145 } break; 146 case ':': { 147 HandleCurrentWord(); 148 149 150 if(line.Length <= charIdx + 1) 151 { 152 tokens.Add(new TokenColon(lineIdx, lineIdx, charIdx, charIdx + 1)); 153 break; 154 } 155 156 if(line[charIdx + 1] != ':') 157 tokens.Add(new TokenColon(lineIdx, lineIdx, charIdx, charIdx + 1)); 158 else { 159 tokens.Add(new TokenDoubleColon(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 160 skipNext = true; 161 } 162 } break; 163 case ';': { 164 HandleCurrentWord(); 165 166 tokens.Add(new TokenEOL(lineIdx, lineIdx, charIdx, charIdx + 1)); 167 } break; 168 case '+': { 169 HandleCurrentWord(); 170 171 if(line[charIdx + 1] != '=') 172 tokens.Add(new TokenPlus(lineIdx, lineIdx, charIdx, charIdx + 1)); 173 else { 174 tokens.Add(new TokenAssignPlus(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 175 skipNext = true; 176 } 177 } break; 178 case '-': { 179 HandleCurrentWord(); 180 181 if(line[charIdx + 1] != '=') 182 tokens.Add(new TokenMinus(lineIdx, lineIdx, charIdx, charIdx + 1)); 183 else { 184 tokens.Add(new TokenAssignMinus(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 185 skipNext = true; 186 } 187 } break; 188 case '*': { 189 HandleCurrentWord(); 190 191 if(line[charIdx + 1] != '=') 192 tokens.Add(new TokenMultiply(lineIdx, lineIdx, charIdx, charIdx + 1)); 193 else { 194 tokens.Add(new TokenAssignMultiply(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 195 skipNext = true; 196 } 197 } break; 198 case '/': { 199 HandleCurrentWord(); 200 201 if(line[charIdx + 1] != '=') 202 tokens.Add(new TokenDivide(lineIdx, lineIdx, charIdx, charIdx + 1)); 203 else { 204 tokens.Add(new TokenAssignDivide(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 205 skipNext = true; 206 } 207 } break; 208 case '^': { 209 HandleCurrentWord(); 210 211 if(line[charIdx + 1] != '=') 212 tokens.Add(new TokenPower(lineIdx, lineIdx, charIdx, charIdx + 1)); 213 else { 214 tokens.Add(new TokenAssignPower(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 215 skipNext = true; 216 } 217 } break; 218 case '%': { 219 HandleCurrentWord(); 220 221 if(line[charIdx + 1] != '=') 222 tokens.Add(new TokenModulo(lineIdx, lineIdx, charIdx, charIdx + 1)); 223 else { 224 tokens.Add(new TokenAssignPower(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 225 skipNext = true; 226 } 227 } break; 228 case '.': { 229 if(!float.TryParse(currentWord, out float fva)) 230 { 231 HandleCurrentWord(); 232 233 tokens.Add(new TokenNamespaceSeparator(lineIdx, lineIdx, charIdx, charIdx + 1)); 234 } else 235 { 236 goto default; 237 } 238 } break; 239 case '|': { 240 HandleCurrentWord(); 241 242 if(line[charIdx + 1] != '|') 243 goto default; 244 else { 245 tokens.Add(new TokenOr(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 246 skipNext = true; 247 } 248 } break; 249 case '&': { 250 HandleCurrentWord(); 251 252 if(line[charIdx + 1] != '&') 253 goto default; 254 else { 255 tokens.Add(new TokenAnd(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 256 skipNext = true; 257 } 258 } break; 259 case '!': { 260 HandleCurrentWord(); 261 262 if(line[charIdx + 1] != '=') 263 tokens.Add(new TokenNot(lineIdx, lineIdx, charIdx, charIdx + 1)); 264 else { 265 tokens.Add(new TokenNotEquals(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 266 skipNext = true; 267 } 268 } break; 269 case '>': { 270 HandleCurrentWord(); 271 272 if(line[charIdx + 1] != '=') 273 tokens.Add(new TokenGreater(lineIdx, lineIdx, charIdx, charIdx + 1)); 274 else { 275 tokens.Add(new TokenGreaterEquals(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 276 skipNext = true; 277 } 278 } break; 279 case '<': { 280 HandleCurrentWord(); 281 282 if(line[charIdx + 1] != '=') 283 tokens.Add(new TokenLesser(lineIdx, lineIdx, charIdx, charIdx + 1)); 284 else { 285 tokens.Add(new TokenLesserEquals(lineIdx, lineIdx, charIdx, charIdx + 1 + 1)); 286 skipNext = true; 287 } 288 } break; 289 default: { 290 if(c != ' ') { 291 currentWord += c; 292 } else { 293 HandleCurrentWord(); 294 } 295 } break; 296 } 297 } 298 } 299 300 return tokens.ToArray(); 301 } 302 303 internal static string GetTokenAsHuman(Token t) 304 { 305 string ret = ""; 306 if (t.GetType() == typeof(TokenString)) 307 { 308 ret += $"[{t}: \"{((dynamic)t).value}\"]"; 309 } 310 else 311 { 312 ret += $"[{t}: '{((dynamic)t).value}']"; 313 } 314 315 // ret += $" cs: {t.charStart}, ce: {t.charEnd}, ls: {t.lineStart}, le: {t.lineEnd}"; 316 return ret; 317 } 318 319 internal static void Print(Token t) 320 { 321 Console.WriteLine(GetTokenAsHuman(t)); 322 } 323}