馃悕馃悕馃悕
1
2// Courtesy of Claude :)
3
4const atRules = new Set([
5 "charset", "import", "namespace", "media", "supports", "page", "font-face",
6 "keyframes", "counter-style", "font-feature-values", "property", "layer",
7 "container", "scope"
8]);
9
10const pseudoClasses = new Set([
11 "hover", "focus", "active", "visited", "link", "disabled", "enabled",
12 "checked", "first-child", "last-child", "nth-child", "nth-last-child",
13 "first-of-type", "last-of-type", "nth-of-type", "nth-last-of-type",
14 "only-child", "only-of-type", "empty", "root", "target", "not", "is",
15 "where", "has", "focus-within", "focus-visible"
16]);
17
18const pseudoElements = new Set([
19 "before", "after", "first-line", "first-letter", "selection", "backdrop",
20 "placeholder", "marker", "file-selector-button"
21]);
22
23const properties = new Set([
24 "display", "position", "top", "right", "bottom", "left", "width", "height",
25 "margin", "padding", "border", "background", "color", "font", "text",
26 "flex", "grid", "transform", "transition", "animation", "opacity",
27 "visibility", "overflow", "z-index", "content", "cursor", "pointer-events"
28]);
29
30const units = new Set([
31 "px", "em", "rem", "ex", "ch", "vw", "vh", "vmin", "vmax", "%",
32 "cm", "mm", "in", "pt", "pc", "deg", "rad", "grad", "turn",
33 "s", "ms", "hz", "khz", "dpi", "dpcm", "dppx", "fr"
34]);
35
36const functions = new Set([
37 "url", "calc", "min", "max", "clamp", "var", "rgb", "rgba", "hsl", "hsla",
38 "linear-gradient", "radial-gradient", "conic-gradient", "repeating-linear-gradient",
39 "repeating-radial-gradient", "translate", "rotate", "scale", "skew", "matrix",
40 "cubic-bezier", "steps", "attr", "counter", "counters"
41]);
42
43export function tokenize(code) {
44 const tokens = [];
45 let i = 0;
46
47 while (i < code.length) {
48 const char = code[i];
49
50 // Whitespace
51 if (/\s/.test(char)) {
52 const start = i;
53 while (i < code.length && /\s/.test(code[i])) i++;
54 tokens.push({ type: "whitespace", value: code.slice(start, i) });
55 continue;
56 }
57
58 // Comments
59 if (char === '/' && code[i + 1] === '*') {
60 const start = i;
61 i += 2;
62 while (i < code.length - 1 && !(code[i] === '*' && code[i + 1] === '/')) i++;
63 if (i < code.length - 1) i += 2;
64 tokens.push({ type: "comment", value: code.slice(start, i) });
65 continue;
66 }
67
68 // Strings
69 if (char === '"' || char === '\'') {
70 const stringToken = tokenizeString(code, i, char);
71 tokens.push(stringToken.token);
72 i = stringToken.newIndex;
73 continue;
74 }
75
76 // URL function special case
77 if (char === 'u' && code.slice(i, i + 4) === "url(") {
78 const urlToken = tokenizeUrl(code, i);
79 if (urlToken) {
80 tokens.push(urlToken.token);
81 i = urlToken.newIndex;
82 continue;
83 }
84 }
85
86 // At-rules
87 if (char === '@') {
88 const atRuleToken = tokenizeAtRule(code, i);
89 tokens.push(atRuleToken.token);
90 i = atRuleToken.newIndex;
91 continue;
92 }
93
94 // Numbers (including units)
95 if (/\d/.test(char) || (char === '.' && /\d/.test(code[i + 1]))) {
96 const numberToken = tokenizeNumber(code, i);
97 tokens.push(numberToken.token);
98 i = numberToken.newIndex;
99 continue;
100 }
101
102 // Hash values (colors, IDs)
103 if (char === '#') {
104 const hashToken = tokenizeHash(code, i);
105 tokens.push(hashToken.token);
106 i = hashToken.newIndex;
107 continue;
108 }
109
110 // Identifiers, properties, values
111 if (/[a-zA-Z_-]/.test(char)) {
112 const identifierToken = tokenizeIdentifier(code, i);
113 tokens.push(identifierToken.token);
114 i = identifierToken.newIndex;
115 continue;
116 }
117
118 // CSS variables
119 if (char === '-' && code[i + 1] === '-') {
120 const variableToken = tokenizeVariable(code, i);
121 tokens.push(variableToken.token);
122 i = variableToken.newIndex;
123 continue;
124 }
125
126 // Pseudo-classes and pseudo-elements
127 if (char === ':') {
128 const pseudoToken = tokenizePseudo(code, i);
129 tokens.push(pseudoToken.token);
130 i = pseudoToken.newIndex;
131 continue;
132 }
133
134 // Operators and punctuation
135 if ("{}[](),.;:>+~*|^$=".includes(char)) {
136 // Handle multi-character operators
137 const twoChar = code.slice(i, i + 2);
138 if (["~=", "|=", "^=", "$=", "*="].includes(twoChar)) {
139 tokens.push({ type: "operator", value: twoChar });
140 i += 2;
141 continue;
142 }
143
144 const type = "{}[]()".includes(char) ? "punctuation" :
145 ",;".includes(char) ? "delimiter" : "operator";
146 tokens.push({ type, value: char });
147 i++;
148 continue;
149 }
150
151 // Important declaration
152 if (char === '!') {
153 const importantToken = tokenizeImportant(code, i);
154 if (importantToken) {
155 tokens.push(importantToken.token);
156 i = importantToken.newIndex;
157 continue;
158 }
159 }
160
161 // Unknown character
162 tokens.push({ type: "unknown", value: char });
163 i++;
164 }
165
166 return tokens;
167}
168
169function tokenizeString(code, start, quote) {
170 let i = start;
171 let value = "";
172
173 value += code[i++]; // Opening quote
174
175 while (i < code.length) {
176 const char = code[i];
177
178 if (char === quote) {
179 value += char;
180 i++;
181 break;
182 }
183
184 if (char === '\\') {
185 value += char;
186 i++;
187 if (i < code.length) {
188 value += code[i];
189 i++;
190 }
191 continue;
192 }
193
194 value += char;
195 i++;
196 }
197
198 return { token: { type: "string", value }, newIndex: i };
199}
200
201function tokenizeUrl(code, start) {
202 let i = start + 4; // Skip "url("
203 let value = "url(";
204
205 // Skip whitespace
206 while (i < code.length && /\s/.test(code[i])) {
207 value += code[i++];
208 }
209
210 // Handle quoted URLs
211 if (code[i] === '"' || code[i] === '\'') {
212 const quote = code[i];
213 value += code[i++];
214 while (i < code.length && code[i] !== quote) {
215 if (code[i] === "\\") {
216 value += code[i++];
217 if (i < code.length) value += code[i++];
218 } else {
219 value += code[i++];
220 }
221 }
222 if (i < code.length) value += code[i++]; // Closing quote
223 } else {
224 // Unquoted URL
225 while (i < code.length && code[i] !== ')' && !/\s/.test(code[i])) {
226 value += code[i++];
227 }
228 }
229
230 // Skip whitespace
231 while (i < code.length && /\s/.test(code[i])) {
232 value += code[i++];
233 }
234
235 if (i < code.length && code[i] === ')') {
236 value += code[i++];
237 }
238
239 return { token: { type: "url", value }, newIndex: i };
240}
241
242function tokenizeAtRule(code, start) {
243 let i = start + 1; // Skip @
244 let value = "@";
245
246 while (i < code.length && /[a-zA-Z-]/.test(code[i])) {
247 value += code[i++];
248 }
249
250 const ruleName = value.slice(1);
251 const type = atRules.has(ruleName) ? "at-rule" : "unknown";
252
253 return { token: { type, value }, newIndex: i };
254}
255
256function tokenizeNumber(code, start) {
257 let i = start;
258 let value = "";
259
260 // Handle negative numbers
261 if (code[i] === "-") {
262 value += code[i++];
263 }
264
265 // Parse number part
266 let hasDecimal = false;
267 while (i < code.length && (/\d/.test(code[i]) || (code[i] === '.' && !hasDecimal))) {
268 if (code[i] === '.') hasDecimal = true;
269 value += code[i++];
270 }
271
272 // Check for unit
273 const unitStart = i;
274 while (i < code.length && /[a-zA-Z%]/.test(code[i])) {
275 i++;
276 }
277
278 if (i > unitStart) {
279 const unit = code.slice(unitStart, i);
280 value += unit;
281 const type = units.has(unit) ? "number-unit" : "unknown";
282 return { token: { type, value }, newIndex: i };
283 }
284
285 return { token: { type: "number", value }, newIndex: i };
286}
287
288function tokenizeHash(code, start) {
289 let i = start + 1; // Skip #
290 let value = "#";
291
292 while (i < code.length && /[a-fA-F0-9]/.test(code[i])) {
293 value += code[i++];
294 }
295
296 const hashValue = value.slice(1);
297 const isColor = /^([a-fA-F0-9]{3}|[a-fA-F0-9]{6}|[a-fA-F0-9]{8})$/.test(hashValue);
298 const type = isColor ? "color" : "hash";
299
300 return { token: { type, value }, newIndex: i };
301}
302
303function tokenizeIdentifier(code, start) {
304 let i = start;
305 let value = "";
306
307 while (i < code.length && /[a-zA-Z0-9_-]/.test(code[i])) {
308 value += code[i++];
309 }
310
311 // Check for function
312 if (i < code.length && code[i] === '(') {
313 const type = functions.has(value) ? "function" : "unknown";
314 return { token: { type, value }, newIndex: i };
315 }
316
317 // Determine type
318 let type = "identifier";
319 if (properties.has(value)) type = "property";
320 else if (value === "important") type = "important";
321
322 return { token: { type, value }, newIndex: i };
323}
324
325function tokenizeVariable(code, start) {
326 let i = start + 2; // Skip --
327 let value = "--";
328
329 while (i < code.length && /[a-zA-Z0-9_-]/.test(code[i])) {
330 value += code[i++];
331 }
332
333 return { token: { type: "variable", value }, newIndex: i };
334}
335
336function tokenizePseudo(code, start) {
337 let i = start + 1; // Skip first :
338 let value = ":";
339
340 // Check for double colon (pseudo-element)
341 if (i < code.length && code[i] === ":") {
342 value += code[i++];
343 }
344
345 while (i < code.length && /[a-zA-Z-]/.test(code[i])) {
346 value += code[i++];
347 }
348
349 const pseudoName = value.replace(/^::?/, "");
350 const isDoubleColon = value.startsWith("::");
351
352 let type = "pseudo-class";
353 if (isDoubleColon && pseudoElements.has(pseudoName)) {
354 type = "pseudo-element";
355 } else if (!isDoubleColon && pseudoClasses.has(pseudoName)) {
356 type = "pseudo-class";
357 } else {
358 type = isDoubleColon ? "unknown" : "unknown";
359 }
360
361 return { token: { type, value }, newIndex: i };
362}
363
364function tokenizeImportant(code, start) {
365 let i = start + 1; // Skip !
366
367 // Skip whitespace
368 while (i < code.length && /\s/.test(code[i])) i++;
369
370 if (code.slice(i, i + 9) === "important") {
371 return {
372 token: { type: "important", value: code.slice(start, i + 9) },
373 newIndex: i + 9
374 };
375 }
376
377 return null;
378}
379