VSCodium/VS Code extension to support for Chez Scheme: Highlighting, autocompletion, documentation on hover and syntax checks.
1/* eslint-disable max-statements */
2/*
3 * SPDX-FileCopyrightText: Copyright 2023 Roland Csaszar
4 * SPDX-License-Identifier: MIT
5 *
6 * Project: vscode-scheme-repl
7 * File: sexps.ts
8 * Date: 15.May.2023
9 *
10 * ==============================================================================
11 * S-expression parsing and related functions.
12 */
13
14import * as h from "./helpers";
15
16/**
17 * The kind of delimiter of the "current" sexp.
18 * One of:
19 * - `"Paren"` - `()`
20 * - `"Bracket"` - `[]`
21 * - `"Brace"` - `{}`
22 * - `"Quote"` - `"`
23 * - `Any` - any one of the delimiters of a sexp, the regexp `[\s,"()\[\]{}]`.
24 */
25type Delimiter = "Paren" | "Bracket" | "Brace" | "Quote" | "Any";
26
27/**
28 * Regex to match the string until the next sexp delimiter to the left of the
29 * end of the string.
30 */
31const leftUntilDelimiter = /.*?(?<sexp>[^\s()[\]{},"]+)\s*$/dsu;
32
33/**
34 * Regex to match the double quoted string to the left of the end of the string.
35 */
36const leftUntilQuote = /.*?(?<sexp>[^"]+)\s*$/du;
37
38/**
39 * Regex to match the vector prefix of a vector.
40 * The vector prefix is returned in the first match group.
41 */
42const vectorRegex = /([`']?#\d*(?:vfx|vu8)?)\($/u;
43
44/**
45 * Return the s-expression that ends at the end of `text`.
46 * Also returns the start position of the sexp, in `startLine` and `startCol`.
47 * @param text The text to parse.
48 * @returns The the s-expression that ends at the end of `text` and the position
49 * of the start of the s-expression in `text`.
50 */
51export function getSexpToLeft(text: string): {
52 sexp: string;
53 startLine: number;
54 startCol: number;
55} {
56 const trimmed = text.trimEnd();
57 const sexp = parseSexpToLeft(["Any"], trimmed, 0);
58 const startPos = h.getStartPosition(text, sexp);
59 return { sexp, startCol: startPos.startCol, startLine: startPos.startLine };
60}
61
62/**
63 * Return the string until the next sexp delimiter to the left.
64 * This is a recursive function, that calls itself and finally returns the whole
65 * sexp as a string.
66 * @param delimStack The stack of delimiters of the current sexp.
67 * @param s The string to parse from the end to the next sexp delimiter.
68 * @returns The string until the next sexp delimiter to the left.
69 */
70function parseSexpToLeft(
71 delimStack: Delimiter[],
72 s: string,
73 level: number
74): string {
75 const delim = h.last(delimStack);
76
77 const foundStart = startOfSexp({ s, delim, delimStack, level });
78 if (foundStart) {
79 return foundStart;
80 }
81
82 const foundSep = listSeparators({ s, delim, delimStack, level });
83 if (foundSep) {
84 return foundSep;
85 }
86
87 const foundEnd = endOfSexp({ s, delim, delimStack, level });
88 if (foundEnd) {
89 return foundEnd;
90 }
91
92 return parseBetweenDelimiters({ s, delim, delimStack, level });
93}
94
95/**
96 * Return the sexp started from the right with a delimiter.
97 * The delimiter can be either `)`, `]`, `}` or `"`. The sexp is parsed from
98 * the right to the left.
99 * Return `undefined` if no sexp is being started, that is, no delimiter is
100 * present at the right end of the string `s`.
101 * @param data The needed data.
102 * @returns The sexp started from the right with a delimiter.
103 */
104function startOfSexp(data: {
105 s: string;
106 delim: Delimiter | undefined;
107 delimStack: Delimiter[];
108 level: number;
109}): string | undefined {
110 if (data.s.endsWith(")") && data.delim !== "Quote") {
111 return parseInSexp({
112 s: data.s,
113 delim: data.delim,
114 delimStack: data.delimStack,
115 level: data.level,
116 currSexpDelim: "Paren",
117 delimString: ")",
118 });
119 } else if (data.s.endsWith("]") && data.delim !== "Quote") {
120 return parseInSexp({
121 s: data.s,
122 delim: data.delim,
123 delimStack: data.delimStack,
124 level: data.level,
125 currSexpDelim: "Bracket",
126 delimString: "]",
127 });
128 } else if (data.s.endsWith("}") && data.delim !== "Quote") {
129 return parseInSexp({
130 s: data.s,
131 delim: data.delim,
132 delimStack: data.delimStack,
133 level: data.level,
134 currSexpDelim: "Brace",
135 delimString: "}",
136 });
137 } else if (data.s.endsWith('"') && data.delim !== "Quote") {
138 data.delimStack.push("Quote");
139 return (
140 parseSexpToLeft(
141 data.delimStack,
142 data.s.slice(0, -1),
143 data.level + 1
144 ) + '"'
145 );
146 }
147 return undefined;
148}
149
150/**
151 * Return a list delimiter (whitespace) or quasiquote delimiter (`,`) at the
152 * right end of the string and continue the parsing.
153 * Return `undefined` if no such delimiter is present.
154 * @param data The needed data.
155 * @returns A list delimiter (whitespace) or quasiquote delimiter (`,`) at the
156 * right end of the string, if present and the following string until the left
157 * end of the current sexp.
158 */
159// eslint-disable-next-line max-lines-per-function
160function listSeparators(data: {
161 s: string;
162 delim: Delimiter | undefined;
163 delimStack: Delimiter[];
164 level: number;
165}): string | undefined {
166 if (data.s.endsWith(",")) {
167 return addDelimAndContinue({
168 s: data.s,
169 delim: data.delim,
170 delimStack: data.delimStack,
171 level: data.level,
172 delimString: ",",
173 numChars: 1,
174 });
175 } else if (data.s.endsWith(" ")) {
176 return addDelimAndContinue({
177 s: data.s,
178 delim: data.delim,
179 delimStack: data.delimStack,
180 level: data.level,
181 delimString: " ",
182 numChars: 1,
183 });
184 } else if (data.s.endsWith("\r\n")) {
185 return addDelimAndContinue({
186 s: data.s,
187 delim: data.delim,
188 delimStack: data.delimStack,
189 level: data.level,
190 delimString: "\r\n",
191 numChars: 2,
192 });
193 } else if (data.s.endsWith("\n")) {
194 return addDelimAndContinue({
195 s: data.s,
196 delim: data.delim,
197 delimStack: data.delimStack,
198 level: data.level,
199 delimString: "\n",
200 numChars: 1,
201 });
202 } else if (data.s.endsWith("\t")) {
203 return addDelimAndContinue({
204 s: data.s,
205 delim: data.delim,
206 delimStack: data.delimStack,
207 level: data.level,
208 delimString: "\t",
209 numChars: 1,
210 });
211 }
212 return undefined;
213}
214
215/**
216 * Return the string between two sexp delimiters, parsing from the right to the
217 * left.
218 * Return `""`, the empty string, if no such string exists between at the end of
219 * the given string `s`.
220 * @param data The needed data.
221 * @returns The string between two sexp delimiters, parsing from the right end
222 * to the left.
223 */
224function parseBetweenDelimiters(data: {
225 delimStack: Delimiter[];
226 delim: Delimiter | undefined;
227 s: string;
228 level: number;
229}): string {
230 switch (data.delim) {
231 case "Paren":
232 case "Bracket":
233 case "Brace":
234 return getNextLeft({
235 s: data.s,
236 delimStack: data.delimStack,
237 level: data.level,
238 regex: leftUntilDelimiter,
239 });
240 case "Quote":
241 return getNextLeft({
242 s: data.s,
243 delimStack: data.delimStack,
244 level: data.level,
245 regex: leftUntilQuote,
246 });
247 case "Any":
248 return getNextLeftReturnLevel0({
249 s: data.s,
250 delimStack: data.delimStack,
251 level: data.level,
252 regex: leftUntilDelimiter,
253 });
254 }
255 return "";
256}
257
258/**
259 * Return the left delimiter at the end of the string `s` or `undefined` if the
260 * end is not a delimiter of a sexp.
261 * If this is not the end of the whole sexp, that is, the `level` is greater
262 * than 0, keep on parsing for a new sexp.
263 * @param data The needed data.
264 * @returns The left delimiter at the end of the string `s` or `undefined` if the
265 * end is not a delimiter of a sexp.
266 */
267// eslint-disable-next-line max-lines-per-function, complexity
268function endOfSexp(data: {
269 s: string;
270 delim: Delimiter | undefined;
271 delimStack: Delimiter[];
272 level: number;
273}): string | undefined {
274 const vecMatch = data.s.match(vectorRegex);
275 if (data.s.endsWith("'(") && data.delim === "Paren") {
276 return endOfSubSexp({
277 s: data.s,
278 length: 2,
279 level: data.level,
280 delimStack: data.delimStack,
281 delimString: "'(",
282 });
283 } else if (data.s.endsWith("`(") && data.delim === "Paren") {
284 return endOfSubSexp({
285 s: data.s,
286 length: 2,
287 level: data.level,
288 delimStack: data.delimStack,
289 delimString: "`(",
290 });
291 } else if (vecMatch && data.delim === "Paren") {
292 return endOfSubSexp({
293 s: data.s,
294 length: vecMatch[1].length + 1,
295 level: data.level,
296 delimStack: data.delimStack,
297 delimString: `${vecMatch[1]}(`,
298 });
299 } else if (data.s.endsWith("(") && data.delim === "Paren") {
300 return endOfSubSexp({
301 s: data.s,
302 length: 1,
303 level: data.level,
304 delimStack: data.delimStack,
305 delimString: "(",
306 });
307 } else if (data.s.endsWith("`[") && data.delim === "Bracket") {
308 return endOfSubSexp({
309 s: data.s,
310 length: 2,
311 level: data.level,
312 delimStack: data.delimStack,
313 delimString: "`[",
314 });
315 } else if (data.s.endsWith("'[") && data.delim === "Bracket") {
316 return endOfSubSexp({
317 s: data.s,
318 length: 2,
319 level: data.level,
320 delimStack: data.delimStack,
321 delimString: "'[",
322 });
323 } else if (data.s.endsWith("[") && data.delim === "Bracket") {
324 return endOfSubSexp({
325 s: data.s,
326 length: 1,
327 level: data.level,
328 delimStack: data.delimStack,
329 delimString: "[",
330 });
331 } else if (data.s.endsWith("'#{") && data.delim === "Brace") {
332 return endOfSubSexp({
333 s: data.s,
334 length: 3,
335 level: data.level,
336 delimStack: data.delimStack,
337 delimString: "'#{",
338 });
339 } else if (data.s.endsWith("`#{") && data.delim === "Brace") {
340 return endOfSubSexp({
341 s: data.s,
342 length: 3,
343 level: data.level,
344 delimStack: data.delimStack,
345 delimString: "`#{",
346 });
347 } else if (data.s.endsWith("#{") && data.delim === "Brace") {
348 return endOfSubSexp({
349 s: data.s,
350 length: 2,
351 level: data.level,
352 delimStack: data.delimStack,
353 delimString: "#{",
354 });
355 } else if (data.s.endsWith("`{") && data.delim === "Brace") {
356 return endOfSubSexp({
357 s: data.s,
358 length: 2,
359 level: data.level,
360 delimStack: data.delimStack,
361 delimString: "`{",
362 });
363 } else if (data.s.endsWith("'{") && data.delim === "Brace") {
364 return endOfSubSexp({
365 s: data.s,
366 length: 2,
367 level: data.level,
368 delimStack: data.delimStack,
369 delimString: "'{",
370 });
371 } else if (data.s.endsWith("{") && data.delim === "Brace") {
372 return endOfSubSexp({
373 s: data.s,
374 length: 1,
375 level: data.level,
376 delimStack: data.delimStack,
377 delimString: "{",
378 });
379 } else if (data.s.endsWith('"') && data.delim === "Quote") {
380 let toCheck = data.s.slice(0, -1);
381 let numBackSlash = 0;
382 let backSlashes = "";
383 while (toCheck.endsWith("\\")) {
384 toCheck = toCheck.slice(0, -1);
385 numBackSlash += 1;
386 backSlashes += "\\";
387 }
388 // eslint-disable-next-line no-magic-numbers
389 if (numBackSlash % 2 === 1) {
390 return (
391 parseSexpToLeft(
392 data.delimStack,
393 data.s.slice(0, -1 - numBackSlash),
394 data.level
395 ) +
396 backSlashes +
397 '"'
398 );
399 }
400 data.delimStack.pop();
401 const newLevel = data.level - 1;
402 if (newLevel === 0) {
403 return '"';
404 } else {
405 return (
406 parseSexpToLeft(
407 data.delimStack,
408 data.s.slice(0, -1 - numBackSlash),
409 newLevel
410 ) +
411 backSlashes +
412 '"'
413 );
414 }
415 }
416 return undefined;
417}
418
419/**
420 * Return the string between two delimiters of a sexp.
421 * @param data The needed data.
422 * @returns The string between two delimiters of a sexp.
423 */
424function parseInSexp(data: {
425 s: string;
426 delim: Delimiter | undefined;
427 delimStack: Delimiter[];
428 level: number;
429 currSexpDelim: Delimiter;
430 delimString: string;
431}): string {
432 data.delimStack.push(data.currSexpDelim);
433 return (
434 parseSexpToLeft(data.delimStack, data.s.slice(0, -1), data.level + 1) +
435 data.delimString
436 );
437}
438
439/**
440 * Return a part of a sexp list or quasiquotation, up until the next delimiter
441 * of a sexp.
442 * @param data The needed data.
443 * @returns A part of a sexp list or quasiquotation, up until the next delimiter
444 * of a sexp to the left.
445 */
446function addDelimAndContinue(data: {
447 s: string;
448 delim: Delimiter | undefined;
449 delimStack: Delimiter[];
450 level: number;
451 delimString: string;
452 numChars: number;
453}): string {
454 return (
455 parseSexpToLeft(
456 data.delimStack,
457 data.s.slice(0, -data.numChars),
458 data.level
459 ) + data.delimString
460 );
461}
462
463/**
464 * Return the sexp beginning from the end of the string `s` to the left.
465 * If we are at `level` 0 just return the delimiter, as we are just parsing a
466 * part of a sexp.
467 * @param data The needed data.
468 * @returns The sexp beginning from the end of the string `s` to the left.
469 */
470function endOfSubSexp(data: {
471 s: string;
472 length: number;
473 level: number;
474 delimStack: Delimiter[];
475 delimString: string;
476}): string {
477 data.delimStack.pop();
478 const newLevel = data.level - 1;
479 if (newLevel === 0) {
480 return data.delimString;
481 } else {
482 return (
483 parseSexpToLeft(
484 data.delimStack,
485 data.s.slice(0, -data.length),
486 newLevel
487 ) + data.delimString
488 );
489 }
490}
491
492/**
493 * Return the next part of a sexp from the right to the left of the string `s`.
494 * Return the empty string `""`, if there is a delimiter at the end of the
495 * string.
496 * @param data The needed data.
497 * @returns The next part of a sexp from the right to the left of the string `s`.
498 */
499function getNextLeft(data: {
500 s: string;
501 delimStack: Delimiter[];
502 level: number;
503 regex: RegExp;
504}): string {
505 const found = data.regex.exec(data.s);
506 if (found) {
507 const foundVal = found.groups ? found.groups.sexp : "";
508 return (
509 parseSexpToLeft(
510 data.delimStack,
511 data.s.slice(0, found.indices ? found.indices[1][0] : 1),
512 data.level
513 ) + foundVal
514 );
515 } else {
516 return "";
517 }
518}
519
520/**
521 * Return the next part of a sexp from the right to the left of the string `s`.
522 * Return the empty string `""`, if there is a delimiter at the end of the
523 * string. If the `level` of the sexp is 0, that means we are parsing an atom,
524 * return this without further parsing.
525 * @param data The needed data.
526 * @returns The next part of a sexp from the right to the left of the string `s`.
527 */
528function getNextLeftReturnLevel0(data: {
529 s: string;
530 delimStack: Delimiter[];
531 level: number;
532 regex: RegExp;
533}): string {
534 const found = data.regex.exec(data.s);
535 if (found) {
536 const foundVal = found.groups ? found.groups.sexp : "";
537 if (data.level === 0) {
538 return foundVal;
539 }
540 return (
541 parseSexpToLeft(
542 data.delimStack,
543 data.s.slice(0, found.indices ? found.indices[1][0] : 1),
544 data.level
545 ) + foundVal
546 );
547 } else {
548 return "";
549 }
550}