1namespace Astrid;
2
3public static class Tokenizer {
4 public static List<string> Keywords = new() {"enum", "class", "if", "while", "return", "match"};
5
6 public static Token[] TokenizeFromFile(string path) {
7 return TokenizeFromMemory(File.ReadAllLines(path));
8 }
9
10 public static Token[] TokenizeFromMemory(string[] fileLines) {
11 for(var i = 0; i < fileLines.Length; i++) {
12 fileLines[i] = fileLines[i].Split("//")[0];
13 }
14
15 List<Token> tokens = new List<Token>();
16
17 string currentWord = "";
18
19 int lineIdx;
20 int charIdx;
21
22 bool isString = false;
23 string[] boolValues = new string[] {"false", "true"};
24
25 void HandleCurrentWord() {
26 if(isString) {
27 tokens.Add(new TokenString(currentWord, lineIdx, lineIdx, charIdx - currentWord.Length - 1, charIdx + 1));
28 }
29 if(currentWord != "") {
30 if(!isString)
31 {
32 int ivalue;
33 float fvalue;
34 string currentWordwithoutEndSuffix = currentWord.ToString();
35 currentWordwithoutEndSuffix = currentWordwithoutEndSuffix.Remove(currentWordwithoutEndSuffix.Length - 1, 1);
36 if(int.TryParse(currentWord, out ivalue))
37 tokens.Add(new TokenInt(ivalue.ToString(), lineIdx, lineIdx, charIdx - ivalue.ToString().Length, charIdx));
38 else if(float.TryParse(currentWord, out fvalue))
39 tokens.Add(new TokenFloat(fvalue.ToString(), lineIdx, lineIdx, charIdx - fvalue.ToString().Length, charIdx));
40 else
41 {
42 bool floatsuffixsatisifed = false;
43 if(float.TryParse(currentWordwithoutEndSuffix, out fvalue)) {
44 if(currentWord.Last() == 'f')
45 {
46 tokens.Add(new TokenFloat(fvalue.ToString(), lineIdx, lineIdx, charIdx - fvalue.ToString().Length - 1, charIdx));
47 floatsuffixsatisifed = true;
48 }
49 }
50
51 if(!floatsuffixsatisifed)
52 {
53 if(boolValues.Contains(currentWord.ToLower())) {
54 tokens.Add(new TokenBoolean((currentWord.ToLower() == "true").ToString().ToLower(), lineIdx, lineIdx, charIdx - currentWord.Length, charIdx));
55 } else {
56 if(Keywords.Contains(currentWord))
57 {
58 tokens.Add(new TokenKeyword(currentWord, lineIdx, lineIdx, charIdx - currentWord.Length, charIdx));
59 } else {
60 tokens.Add(new TokenIdentifier(currentWord, lineIdx, lineIdx, charIdx - currentWord.Length, charIdx));
61 }
62 }
63 }
64 }
65 }
66 }// else if(currentWord == "" && isString)
67 // {
68 // tokens.Add(new TokenString(currentWord, lineIdx, lineIdx, charIdx - currentWord.Length - 1, charIdx + 1));
69 // }
70 currentWord = "";
71 }
72
73 lineIdx = -1;
74 foreach(string line in fileLines) {
75 lineIdx++;
76 charIdx = -1;
77 var skipNext = false;
78 foreach(char c in line) {
79 charIdx++;
80 if(skipNext)
81 {
82 skipNext = false;
83 continue;
84 }
85
86 if(isString) {
87 if(c == '"') {
88 HandleCurrentWord();
89 isString = false;
90 continue;
91 }
92 currentWord += c;
93 continue;
94 }
95
96 switch(c) {
97 case '=': {
98 HandleCurrentWord();
99
100 if(line[charIdx + 1] != '=')
101 tokens.Add(new TokenAssign(lineIdx, lineIdx, charIdx, charIdx + 1));
102 else {
103 tokens.Add(new TokenEquals(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
104 skipNext = true;
105 }
106 } break;
107 case '"': {
108 isString = true;
109 } break;
110 case ',': {
111 HandleCurrentWord();
112
113 tokens.Add(new TokenComma(lineIdx, lineIdx, charIdx, charIdx + 1));
114 } break;
115
116 case '[': {
117 HandleCurrentWord();
118
119 tokens.Add(new TokenArrayStart(lineIdx, lineIdx, charIdx, charIdx + 1));
120 } break;
121 case ']': {
122 HandleCurrentWord();
123
124 tokens.Add(new TokenArrayEnd(lineIdx, lineIdx, charIdx, charIdx + 1));
125 } break;
126 case '(': {
127 HandleCurrentWord();
128
129 tokens.Add(new TokenParenStart(lineIdx, lineIdx, charIdx, charIdx + 1));
130 } break;
131 case ')': {
132 HandleCurrentWord();
133
134 tokens.Add(new TokenParenEnd(lineIdx, lineIdx, charIdx, charIdx + 1));
135 } break;
136 case '{': {
137 HandleCurrentWord();
138
139 tokens.Add(new TokenBlockStart(lineIdx, lineIdx, charIdx, charIdx + 1));
140 } break;
141 case '}': {
142 HandleCurrentWord();
143
144 tokens.Add(new TokenBlockEnd(lineIdx, lineIdx, charIdx, charIdx + 1));
145 } break;
146 case ':': {
147 HandleCurrentWord();
148
149
150 if(line.Length <= charIdx + 1)
151 {
152 tokens.Add(new TokenColon(lineIdx, lineIdx, charIdx, charIdx + 1));
153 break;
154 }
155
156 if(line[charIdx + 1] != ':')
157 tokens.Add(new TokenColon(lineIdx, lineIdx, charIdx, charIdx + 1));
158 else {
159 tokens.Add(new TokenDoubleColon(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
160 skipNext = true;
161 }
162 } break;
163 case ';': {
164 HandleCurrentWord();
165
166 tokens.Add(new TokenEOL(lineIdx, lineIdx, charIdx, charIdx + 1));
167 } break;
168 case '+': {
169 HandleCurrentWord();
170
171 if(line[charIdx + 1] != '=')
172 tokens.Add(new TokenPlus(lineIdx, lineIdx, charIdx, charIdx + 1));
173 else {
174 tokens.Add(new TokenAssignPlus(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
175 skipNext = true;
176 }
177 } break;
178 case '-': {
179 HandleCurrentWord();
180
181 if(line[charIdx + 1] != '=')
182 tokens.Add(new TokenMinus(lineIdx, lineIdx, charIdx, charIdx + 1));
183 else {
184 tokens.Add(new TokenAssignMinus(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
185 skipNext = true;
186 }
187 } break;
188 case '*': {
189 HandleCurrentWord();
190
191 if(line[charIdx + 1] != '=')
192 tokens.Add(new TokenMultiply(lineIdx, lineIdx, charIdx, charIdx + 1));
193 else {
194 tokens.Add(new TokenAssignMultiply(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
195 skipNext = true;
196 }
197 } break;
198 case '/': {
199 HandleCurrentWord();
200
201 if(line[charIdx + 1] != '=')
202 tokens.Add(new TokenDivide(lineIdx, lineIdx, charIdx, charIdx + 1));
203 else {
204 tokens.Add(new TokenAssignDivide(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
205 skipNext = true;
206 }
207 } break;
208 case '^': {
209 HandleCurrentWord();
210
211 if(line[charIdx + 1] != '=')
212 tokens.Add(new TokenPower(lineIdx, lineIdx, charIdx, charIdx + 1));
213 else {
214 tokens.Add(new TokenAssignPower(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
215 skipNext = true;
216 }
217 } break;
218 case '%': {
219 HandleCurrentWord();
220
221 if(line[charIdx + 1] != '=')
222 tokens.Add(new TokenModulo(lineIdx, lineIdx, charIdx, charIdx + 1));
223 else {
224 tokens.Add(new TokenAssignPower(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
225 skipNext = true;
226 }
227 } break;
228 case '.': {
229 if(!float.TryParse(currentWord, out float fva))
230 {
231 HandleCurrentWord();
232
233 tokens.Add(new TokenNamespaceSeparator(lineIdx, lineIdx, charIdx, charIdx + 1));
234 } else
235 {
236 goto default;
237 }
238 } break;
239 case '|': {
240 HandleCurrentWord();
241
242 if(line[charIdx + 1] != '|')
243 goto default;
244 else {
245 tokens.Add(new TokenOr(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
246 skipNext = true;
247 }
248 } break;
249 case '&': {
250 HandleCurrentWord();
251
252 if(line[charIdx + 1] != '&')
253 goto default;
254 else {
255 tokens.Add(new TokenAnd(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
256 skipNext = true;
257 }
258 } break;
259 case '!': {
260 HandleCurrentWord();
261
262 if(line[charIdx + 1] != '=')
263 tokens.Add(new TokenNot(lineIdx, lineIdx, charIdx, charIdx + 1));
264 else {
265 tokens.Add(new TokenNotEquals(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
266 skipNext = true;
267 }
268 } break;
269 case '>': {
270 HandleCurrentWord();
271
272 if(line[charIdx + 1] != '=')
273 tokens.Add(new TokenGreater(lineIdx, lineIdx, charIdx, charIdx + 1));
274 else {
275 tokens.Add(new TokenGreaterEquals(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
276 skipNext = true;
277 }
278 } break;
279 case '<': {
280 HandleCurrentWord();
281
282 if(line[charIdx + 1] != '=')
283 tokens.Add(new TokenLesser(lineIdx, lineIdx, charIdx, charIdx + 1));
284 else {
285 tokens.Add(new TokenLesserEquals(lineIdx, lineIdx, charIdx, charIdx + 1 + 1));
286 skipNext = true;
287 }
288 } break;
289 default: {
290 if(c != ' ') {
291 currentWord += c;
292 } else {
293 HandleCurrentWord();
294 }
295 } break;
296 }
297 }
298 }
299
300 return tokens.ToArray();
301 }
302
303 internal static string GetTokenAsHuman(Token t)
304 {
305 string ret = "";
306 if (t.GetType() == typeof(TokenString))
307 {
308 ret += $"[{t}: \"{((dynamic)t).value}\"]";
309 }
310 else
311 {
312 ret += $"[{t}: '{((dynamic)t).value}']";
313 }
314
315 // ret += $" cs: {t.charStart}, ce: {t.charEnd}, ls: {t.lineStart}, le: {t.lineEnd}";
316 return ret;
317 }
318
319 internal static void Print(Token t)
320 {
321 Console.WriteLine(GetTokenAsHuman(t));
322 }
323}