VSCodium/VS Code extension to support for Chez Scheme: Highlighting, autocompletion, documentation on hover and syntax checks.
1/*
2 * SPDX-FileCopyrightText: Copyright 2023 Roland Csaszar
3 * SPDX-License-Identifier: MIT
4 *
5 * Project: vscode-scheme-repl
6 * File: generate_function_documentation.ts
7 * Date: 19.May.2023
8 *
9 * ==============================================================================
10 * Parse the HTML documentation from the Chez Scheme page
11 * https://cisco.github.io/ChezScheme/csug10.0/summary.html
12 * and generate a list of objects holding the parsed data in `outFilename`.
13 * Run with `yarn --ignore-engines ts-node generate_function_documentation.ts`.
14 *
15 * The Chez Scheme user's guide is licensed under the Apache License Version 2:
16 * https://cisco.github.io/ChezScheme/csug10.0/canned/copyright.html.
17 */
18
19/* eslint-disable operator-linebreak */
20/* eslint-disable indent */
21
22import * as https from "https";
23import {
24 createWriteStream,
25 existsSync,
26 readFile,
27 unlinkSync,
28 writeFile,
29} from "fs";
30import { JSDOM } from "jsdom";
31import { basename } from "path";
32
33/**
34 * The type of a identifier.
35 * Warning: copied from `./src/identifierDocumentation.ts` because of problems
36 * with module imports.
37 */
38type IdentifierType =
39 | "syntax"
40 | "module"
41 | "procedure"
42 | "thread parameter"
43 | "global parameter"
44 | "Error: unknown";
45
46/**
47 * Return the string `s` converted to an `IdentifierType`.
48 * Return `"Error: unknown"` if the string isn't recognized.
49 * @param s The string to convert to an `IdentifierType`.
50 * @returns The string `s` converted to an `IdentifierType`.
51 * Return `"Error: unknown"` if the string isn't recognized.
52 */
53function stringToIdentifierType(s: string): IdentifierType {
54 switch (s) {
55 case "syntax":
56 return "syntax";
57 case "procedure":
58 return "procedure";
59 case "module":
60 return "module";
61 case "thread param":
62 return "thread parameter";
63 case "global param":
64 return "global parameter";
65 }
66
67 return "Error: unknown";
68}
69
70/**
71 * The object to save the data of a function documentation to.
72 * Warning: copied from `./src/identifierDocumentation.ts` because of problems
73 * with module imports.
74 */
75type FunctionDoc = {
76 name: string;
77 startParen: boolean;
78 endParen: boolean;
79 params: string[];
80 type: IdentifierType;
81 moduleNames: string[];
82 url: URL;
83 description: string;
84};
85
86/**
87 * The `RegExp` to match an example in a description.
88 * The first group matches the whole example code.
89 */
90const exampleRegex = /\n((?:\\`.*?`\s*(?:=>|\*)?\s*<br>\n)+)(?:<br>)?\n*$/u;
91
92/**
93 * The `RegExp` to match a single line of a multi-line code example with many
94 * individual backticks.
95 * The first group contains the actual data.
96 */
97const lineFormatRegex = /^\\`(.*)\\`\s*<br>\s*$/gmu;
98
99/**
100 * The `RegExp` to match a `libraries` stanza in a description.
101 * The first group contains the libraries names.
102 */
103const librariesRegex =
104 /^\s*\*\*libraries:\*\*\s*(\\`\(.*?\)\\`(?:,\s*\\`\(.*?\)\\`)*)\s*<br>\s*$/mu;
105
106/**
107 * The base part of the Chez Scheme documentation URL.
108 */
109const baseURL = "https://cisco.github.io/ChezScheme/csug10.0/";
110
111/**
112 * The URL of the Chez Scheme documentation website.
113 */
114const docURL = baseURL + "summary.html";
115
116/**
117 * The name of the file to save the parsed data to.
118 */
119const outFilename = "./src/functionDocumentation.ts";
120
121/**
122 * The list of downloaded files to delete when exiting the program.
123 */
124const filesToDelete: string[] = [];
125
126/**
127 * Main entry point.
128 */
129async function main(): Promise<void> {
130 try {
131 const htmlText = await downloadAndRead(docURL);
132 const tsText = await processHTML(htmlText);
133 await writeFunctionDocumentation(tsText);
134 const deleteSet = new Set(filesToDelete);
135 deleteSet.forEach((file) => unlinkSync(file));
136 } catch (error) {
137 console.error(
138 `Caught "${error}" trying to process the HTML and saving it.`
139 );
140 process.exit(1);
141 }
142}
143
144/**
145 * Return the data of the Chez function documentation as Typescript objects in a
146 * text file.
147 * @param text The HTML documentation file to parse.
148 * @returns The data of the Chez function documentation as Typescript objects in a
149 * text file.
150 */
151async function processHTML(text: string): Promise<string> {
152 const htmlDoc = new JSDOM(text).window.document;
153 const trs = Array.from(htmlDoc.querySelectorAll("tr")).filter(
154 // eslint-disable-next-line no-magic-numbers
155 (e) => e.childElementCount === 3 && e.children[0].nodeName !== "TH"
156 );
157 const ids: FunctionDoc[] = trs.map((tr) => parseTR(tr));
158 const allUrl = Array.from(
159 new Set(
160 ids.map((id) => id.url.protocol + id.url.hostname + id.url.pathname)
161 )
162 );
163 await Promise.all(
164 allUrl.map((url) =>
165 download(new URL(url), fileNameFromURL(new URL(url)))
166 )
167 );
168 await Promise.all(ids.map((id) => addDescription(id)));
169
170 ids.forEach((id) => addLibraries(id));
171
172 return idsDocToTSFile(ids);
173}
174
175/**
176 * Return a TS file content with the list of `FunctionDoc`s.
177 * @param ids The list of `FunctionDoc`s to convert.
178 * @returns A TS file content with the list of `FunctionDoc`s.
179 */
180function idsDocToTSFile(ids: FunctionDoc[]): string {
181 const today = new Date();
182 const date = today.getDate();
183 const month = today.getMonth() + 1;
184 const year = today.getFullYear();
185
186 return `/*
187 * SPDX-FileCopyrightText: Copyright 2023 Roland Csaszar
188 * SPDX-License-Identifier: MIT
189 *
190 * Project: vscode-scheme-repl
191 * File: functionDocumentation.ts
192 * Date: ${date}.${month}.${year}
193 *
194 * ==============================================================================
195 * The Chez Scheme user's guide is licensed under the Apache License Version 2:
196 * https://cisco.github.io/ChezScheme/csug10.0/canned/copyright.html.
197 * Autogenerated by the script \`../generate_function_documentation.ts\`, from
198 * ${docURL}
199 * DO NOT EDIT!
200 */
201
202/* eslint-disable max-lines */
203
204import { FunctionDoc } from "./identifierDocumentation";
205
206export const functionDocs: FunctionDoc[] = [
207${ids
208 .map(
209 (id) =>
210 ` {
211 name: "${id.name}",
212 startParen: ${id.startParen},
213 endParen: ${id.endParen},
214 params: ["${id.params.join('", "')}"],
215 type: "${id.type}",
216 moduleNames: ["${id.moduleNames.join('", "')}"],
217 url: new URL("${id.url}"),
218 description: \`${id.description}\`
219 },`
220 )
221 .join("\n")}
222]
223`;
224}
225
226/**
227 * Parse the given `tr` element and add the date to `ids`.
228 * @param tr The `tr` element to parse.
229 * @returns The filled `FunctionDoc` object.
230 */
231// eslint-disable-next-line max-statements, max-lines-per-function
232function parseTR(tr: HTMLTableRowElement): FunctionDoc {
233 const tds = Array.from(tr.childNodes) as HTMLTableCellElement[];
234 const idType = stringToIdentifierType(tds[1].innerHTML);
235 let name = "";
236 let params: string[] = [];
237 let startParen = false;
238 let endParen = false;
239 const nameElems = tds[0].childNodes[0].childNodes;
240 if (idType === "global parameter" || idType === "thread parameter") {
241 const tmpName = stringOrEmpty(nameElems[0].textContent);
242 startParen = tmpName.startsWith("(");
243 name = startParen ? tmpName.slice(1) : tmpName;
244 } else if (nameElems.length > 1) {
245 ({ startParen, name, endParen } = parseParamsAndName({
246 nameElems,
247 startParen,
248 name,
249 params,
250 endParen,
251 }));
252 } else {
253 const tmpName = stringOrEmpty(nameElems[0].textContent).trim();
254 startParen = tmpName.startsWith("(");
255 endParen = tmpName.endsWith(")");
256 // eslint-disable-next-line no-nested-ternary
257 name = startParen
258 ? endParen
259 ? tmpName.slice(1).slice(0, -1)
260 : tmpName.slice(1)
261 : endParen
262 ? tmpName.slice(0, -1)
263 : tmpName;
264 }
265 const url = new URL(
266 // eslint-disable-next-line no-magic-numbers, dot-notation
267 (tds[2].childNodes[0] as HTMLAnchorElement).href.startsWith("./")
268 ? // eslint-disable-next-line no-magic-numbers
269 baseURL + (tds[2].childNodes[0] as HTMLAnchorElement).href
270 : // eslint-disable-next-line no-magic-numbers
271 (tds[2].childNodes[0] as HTMLAnchorElement).href
272 );
273 url.protocol = "https";
274 return {
275 name,
276 startParen,
277 endParen,
278 type: idType,
279 moduleNames: [],
280 params,
281 url,
282 description: "",
283 };
284}
285
286/**
287 * Return the parsed data in the object
288 * `{ startParen: boolean; name: string; endParen: boolean }`.
289 * @param data The data needed for this function.
290 * @returns The parsed data in the object
291 * `{ startParen: boolean; name: string; endParen: boolean }`.
292 */
293// eslint-disable-next-line max-statements
294function parseParamsAndName(data: {
295 nameElems: NodeListOf<ChildNode>;
296 startParen: boolean;
297 name: string;
298 params: string[];
299 endParen: boolean;
300}): { startParen: boolean; name: string; endParen: boolean } {
301 const tmpName = stringOrEmpty(data.nameElems[0].textContent).trimStart();
302 data.startParen = tmpName.startsWith("(");
303 data.name = data.startParen ? tmpName.slice(1) : tmpName;
304
305 // eslint-disable-next-line no-plusplus
306 for (let nameIdx = 1; nameIdx < data.nameElems.length - 1; nameIdx++) {
307 if (data.nameElems[nameIdx].nodeName === "I") {
308 data.params.push(
309 stringOrEmpty(data.nameElems[nameIdx].textContent)
310 );
311 }
312 }
313 const end = stringOrEmpty(
314 data.nameElems[data.nameElems.length - 1].textContent
315 ).trim();
316 if (end.endsWith(")")) {
317 if (end !== ")") {
318 data.params.push(end.slice(0, -1).trim());
319 }
320 data.endParen = true;
321 }
322 return {
323 startParen: data.startParen,
324 name: data.name,
325 endParen: data.endParen,
326 };
327}
328
329/**
330 * Download the identifier's description from the URL in the `FunctionDoc`,
331 * parse it and save it into the field `id.description`.
332 * @param id The `FunctionDoc` to process.
333 */
334// eslint-disable-next-line max-statements
335async function addDescription(id: FunctionDoc) {
336 const htmlString = await downloadAndRead(id.url.toString());
337 const htmlDoc = new JSDOM(htmlString).window.document;
338 const anchor = id.url.hash.slice(1);
339 let currP = htmlDoc.querySelector(`a[name="${anchor}"]`)?.closest("p");
340 const first = currP;
341 const text = [""];
342 while (
343 currP &&
344 (currP === first ||
345 // eslint-disable-next-line no-eq-null, eqeqeq
346 currP.querySelector(`a[name]:not(a[name="${anchor}"])`) == null)
347 ) {
348 currP.childNodes.forEach((c) => parseChildNode(c, text));
349 text.push("<br>\n");
350 currP = currP.nextElementSibling as HTMLParagraphElement;
351 }
352 id.description = sanitizeDescription(text.join(""));
353}
354
355/**
356 * Parse the `FunctionDoc` description and add the needed libraries as
357 * `moduleNames` to the object.
358 * @param id The `FunctionDoc` object to process.
359 */
360function addLibraries(id: FunctionDoc) {
361 const match = id.description.match(librariesRegex);
362 if (match) {
363 // eslint-disable-next-line prefer-destructuring
364 const librariesRaw = match[1];
365 const libraries = librariesRaw.replace(/\\`/gu, "").split(/,\s*/gu);
366 id.moduleNames = libraries;
367 } else {
368 id.moduleNames = [];
369 }
370}
371
372/**
373 * Parse a single HTML node of the description.
374 * @param c The HTML node to parse.
375 * @param text The description's text to append to.
376 */
377// eslint-disable-next-line max-lines-per-function
378function parseChildNode(c: ChildNode, text: string[]) {
379 switch (c.nodeName) {
380 case "BR":
381 text.push(`<br>\n`);
382 break;
383 case "B":
384 text.push(`**${c.textContent?.replace(/\n/gu, " ").trim()}** `);
385 break;
386 case "TT":
387 c.childNodes.forEach((cN) => {
388 switch (cN.nodeName) {
389 case "BR":
390 text.push(`<br>\n`);
391 break;
392 case "B":
393 text.push(
394 `**${cN.textContent
395 ?.replace(/\n/gu, " ")
396 .trim()}** `
397 );
398 break;
399 case "I":
400 text.push(
401 "*`" +
402 `${cN.textContent
403 ?.replace(/\n/gu, " ")
404 .trim()}` +
405 "`*"
406 );
407 break;
408 case "IMG":
409 if ((cN as HTMLImageElement).src.endsWith("0.gif")) {
410 text.push("=>");
411 }
412 break;
413 case "#text":
414 text.push(
415 // eslint-disable-next-line no-useless-concat
416 "`" +
417 `${cN.textContent?.replace(/\n/gu, " ")}` +
418 "`"
419 );
420
421 break;
422 }
423 });
424 break;
425 case "#text":
426 text.push(`${c.textContent?.replace(/\n/gu, " ")}`);
427 break;
428 case "SPAN":
429 c.childNodes.forEach((cN) => parseChildNode(cN, text));
430 break;
431 }
432}
433
434/**
435 * Return a sanitized version of the given text.
436 * That is, without excessive whitespace and with escaped backticks and
437 * backslashes. Also puts examples at the end into one big code block instead of
438 * many individual backticks.
439 * @param text The description text to sanitize.
440 * @returns The sanitized description.
441 */
442function sanitizeDescription(text: string): string {
443 let sanitized = text
444 .replace(/[ ]+/gu, " ")
445 .replace(/^ /gmu, "")
446 .replace(/[ ]+\n/gu, "\n")
447 .replace(/\n[\n]+$/u, "\n")
448 .replace(/\n\n[\n]+/gu, "\n\n")
449 // Non-breaking-space.
450 .replace(/\u00A0/gu, " ")
451 .replace(/\\/gu, "\\\\")
452 .replace(/`/gu, "\\`");
453 const match = sanitized.match(exampleRegex);
454 if (match) {
455 // eslint-disable-next-line prefer-destructuring
456 const example = match[1];
457 const exampleNoBackticks = example
458 .replace(lineFormatRegex, "$1")
459 .replace(/\\`/gu, "")
460 .replace(/^ /gmu, "");
461 sanitized = sanitized.replace(
462 example,
463 "**Examples:**\n\n\\`\\`\\`scheme\n" +
464 exampleNoBackticks +
465 "\n\\`\\`\\`\n"
466 );
467 }
468 return sanitized;
469}
470
471/**
472 * Return the string `s` if it isn't `undefined` or `null`, the empty string
473 * `""` else. Changes all non breaking spaces (`\u00A0`) to "normal" spaces.
474 * @param s The `string` or `undefined` value to "convert".
475 * @returns The string `s` if it isn't `undefined` or `null`, the empty string
476 * `""` else.
477 */
478function stringOrEmpty(s: string | undefined | null): string {
479 return s ? s.replace(/\u00A0/gu, " ") : "";
480}
481
482/**
483 * Return the filename to use for the downloaded file from the given URL.
484 * @param url The URL to generate the filename from.
485 * @returns The filename to use for the downloaded file from the given URL.
486 */
487function fileNameFromURL(url: URL) {
488 return url.hostname + basename(url.pathname);
489}
490
491/**
492 * Download the given URL and return the content of the file.
493 * Exits the program if the download fails. Deletes the downloaded file after
494 * reading it
495 * @param url The URL of the website to download.
496 * @returns The content of the downloaded file.
497 */
498async function downloadAndRead(url: string) {
499 const urlUrl = new URL(url);
500 const downloadTo = fileNameFromURL(urlUrl);
501 if (!existsSync(downloadTo)) {
502 try {
503 await download(urlUrl, downloadTo);
504 } catch (exp) {
505 console.error(`Caught "${exp}" trying to download from ${url}`);
506 process.exit(1);
507 }
508 }
509 filesToDelete.push(downloadTo);
510 return new Promise<string>((resolve, reject) => {
511 readFile(downloadTo, { encoding: "utf8" }, (r, d) => {
512 if (r) {
513 reject(r);
514 }
515 resolve(d);
516 });
517 });
518}
519
520/**
521 * Download a file to the given path `fileName`.
522 * @param url The URL to download.
523 * @param fileName The path to save the downloaded file to.
524 * @returns Nothing.
525 */
526async function download(url: URL, fileName: string): Promise<void> {
527 const fileStream = createWriteStream(fileName);
528 return new Promise<void>((resolve, reject) => {
529 https.get(url, (res) => {
530 res.pipe(fileStream);
531 res.on("error", (e) => reject(e));
532 fileStream.on("finish", () =>
533 fileStream.close((err) => {
534 if (err) {
535 reject(err);
536 }
537 resolve();
538 })
539 );
540 });
541 });
542}
543
544/**
545 * Write the parsed data to the file `outFilename`.
546 * @param text The text to save.
547 */
548async function writeFunctionDocumentation(text: string): Promise<void> {
549 return new Promise<void>((resolve, reject) => {
550 writeFile(outFilename, text, { encoding: "utf8" }, (r) => {
551 if (r) {
552 reject(r);
553 }
554 resolve();
555 });
556 });
557}
558
559main();