VSCodium/VS Code extension to support for Chez Scheme: Highlighting, autocompletion, documentation on hover and syntax checks.
at main 559 lines 18 kB view raw
1/* 2 * SPDX-FileCopyrightText: Copyright 2023 Roland Csaszar 3 * SPDX-License-Identifier: MIT 4 * 5 * Project: vscode-scheme-repl 6 * File: generate_function_documentation.ts 7 * Date: 19.May.2023 8 * 9 * ============================================================================== 10 * Parse the HTML documentation from the Chez Scheme page 11 * https://cisco.github.io/ChezScheme/csug10.0/summary.html 12 * and generate a list of objects holding the parsed data in `outFilename`. 13 * Run with `yarn --ignore-engines ts-node generate_function_documentation.ts`. 14 * 15 * The Chez Scheme user's guide is licensed under the Apache License Version 2: 16 * https://cisco.github.io/ChezScheme/csug10.0/canned/copyright.html. 17 */ 18 19/* eslint-disable operator-linebreak */ 20/* eslint-disable indent */ 21 22import * as https from "https"; 23import { 24 createWriteStream, 25 existsSync, 26 readFile, 27 unlinkSync, 28 writeFile, 29} from "fs"; 30import { JSDOM } from "jsdom"; 31import { basename } from "path"; 32 33/** 34 * The type of a identifier. 35 * Warning: copied from `./src/identifierDocumentation.ts` because of problems 36 * with module imports. 37 */ 38type IdentifierType = 39 | "syntax" 40 | "module" 41 | "procedure" 42 | "thread parameter" 43 | "global parameter" 44 | "Error: unknown"; 45 46/** 47 * Return the string `s` converted to an `IdentifierType`. 48 * Return `"Error: unknown"` if the string isn't recognized. 49 * @param s The string to convert to an `IdentifierType`. 50 * @returns The string `s` converted to an `IdentifierType`. 51 * Return `"Error: unknown"` if the string isn't recognized. 52 */ 53function stringToIdentifierType(s: string): IdentifierType { 54 switch (s) { 55 case "syntax": 56 return "syntax"; 57 case "procedure": 58 return "procedure"; 59 case "module": 60 return "module"; 61 case "thread param": 62 return "thread parameter"; 63 case "global param": 64 return "global parameter"; 65 } 66 67 return "Error: unknown"; 68} 69 70/** 71 * The object to save the data of a function documentation to. 72 * Warning: copied from `./src/identifierDocumentation.ts` because of problems 73 * with module imports. 74 */ 75type FunctionDoc = { 76 name: string; 77 startParen: boolean; 78 endParen: boolean; 79 params: string[]; 80 type: IdentifierType; 81 moduleNames: string[]; 82 url: URL; 83 description: string; 84}; 85 86/** 87 * The `RegExp` to match an example in a description. 88 * The first group matches the whole example code. 89 */ 90const exampleRegex = /\n((?:\\`.*?`\s*(?:=>|\*)?\s*<br>\n)+)(?:<br>)?\n*$/u; 91 92/** 93 * The `RegExp` to match a single line of a multi-line code example with many 94 * individual backticks. 95 * The first group contains the actual data. 96 */ 97const lineFormatRegex = /^\\`(.*)\\`\s*<br>\s*$/gmu; 98 99/** 100 * The `RegExp` to match a `libraries` stanza in a description. 101 * The first group contains the libraries names. 102 */ 103const librariesRegex = 104 /^\s*\*\*libraries:\*\*\s*(\\`\(.*?\)\\`(?:,\s*\\`\(.*?\)\\`)*)\s*<br>\s*$/mu; 105 106/** 107 * The base part of the Chez Scheme documentation URL. 108 */ 109const baseURL = "https://cisco.github.io/ChezScheme/csug10.0/"; 110 111/** 112 * The URL of the Chez Scheme documentation website. 113 */ 114const docURL = baseURL + "summary.html"; 115 116/** 117 * The name of the file to save the parsed data to. 118 */ 119const outFilename = "./src/functionDocumentation.ts"; 120 121/** 122 * The list of downloaded files to delete when exiting the program. 123 */ 124const filesToDelete: string[] = []; 125 126/** 127 * Main entry point. 128 */ 129async function main(): Promise<void> { 130 try { 131 const htmlText = await downloadAndRead(docURL); 132 const tsText = await processHTML(htmlText); 133 await writeFunctionDocumentation(tsText); 134 const deleteSet = new Set(filesToDelete); 135 deleteSet.forEach((file) => unlinkSync(file)); 136 } catch (error) { 137 console.error( 138 `Caught "${error}" trying to process the HTML and saving it.` 139 ); 140 process.exit(1); 141 } 142} 143 144/** 145 * Return the data of the Chez function documentation as Typescript objects in a 146 * text file. 147 * @param text The HTML documentation file to parse. 148 * @returns The data of the Chez function documentation as Typescript objects in a 149 * text file. 150 */ 151async function processHTML(text: string): Promise<string> { 152 const htmlDoc = new JSDOM(text).window.document; 153 const trs = Array.from(htmlDoc.querySelectorAll("tr")).filter( 154 // eslint-disable-next-line no-magic-numbers 155 (e) => e.childElementCount === 3 && e.children[0].nodeName !== "TH" 156 ); 157 const ids: FunctionDoc[] = trs.map((tr) => parseTR(tr)); 158 const allUrl = Array.from( 159 new Set( 160 ids.map((id) => id.url.protocol + id.url.hostname + id.url.pathname) 161 ) 162 ); 163 await Promise.all( 164 allUrl.map((url) => 165 download(new URL(url), fileNameFromURL(new URL(url))) 166 ) 167 ); 168 await Promise.all(ids.map((id) => addDescription(id))); 169 170 ids.forEach((id) => addLibraries(id)); 171 172 return idsDocToTSFile(ids); 173} 174 175/** 176 * Return a TS file content with the list of `FunctionDoc`s. 177 * @param ids The list of `FunctionDoc`s to convert. 178 * @returns A TS file content with the list of `FunctionDoc`s. 179 */ 180function idsDocToTSFile(ids: FunctionDoc[]): string { 181 const today = new Date(); 182 const date = today.getDate(); 183 const month = today.getMonth() + 1; 184 const year = today.getFullYear(); 185 186 return `/* 187 * SPDX-FileCopyrightText: Copyright 2023 Roland Csaszar 188 * SPDX-License-Identifier: MIT 189 * 190 * Project: vscode-scheme-repl 191 * File: functionDocumentation.ts 192 * Date: ${date}.${month}.${year} 193 * 194 * ============================================================================== 195 * The Chez Scheme user's guide is licensed under the Apache License Version 2: 196 * https://cisco.github.io/ChezScheme/csug10.0/canned/copyright.html. 197 * Autogenerated by the script \`../generate_function_documentation.ts\`, from 198 * ${docURL} 199 * DO NOT EDIT! 200 */ 201 202/* eslint-disable max-lines */ 203 204import { FunctionDoc } from "./identifierDocumentation"; 205 206export const functionDocs: FunctionDoc[] = [ 207${ids 208 .map( 209 (id) => 210 ` { 211 name: "${id.name}", 212 startParen: ${id.startParen}, 213 endParen: ${id.endParen}, 214 params: ["${id.params.join('", "')}"], 215 type: "${id.type}", 216 moduleNames: ["${id.moduleNames.join('", "')}"], 217 url: new URL("${id.url}"), 218 description: \`${id.description}\` 219 },` 220 ) 221 .join("\n")} 222] 223`; 224} 225 226/** 227 * Parse the given `tr` element and add the date to `ids`. 228 * @param tr The `tr` element to parse. 229 * @returns The filled `FunctionDoc` object. 230 */ 231// eslint-disable-next-line max-statements, max-lines-per-function 232function parseTR(tr: HTMLTableRowElement): FunctionDoc { 233 const tds = Array.from(tr.childNodes) as HTMLTableCellElement[]; 234 const idType = stringToIdentifierType(tds[1].innerHTML); 235 let name = ""; 236 let params: string[] = []; 237 let startParen = false; 238 let endParen = false; 239 const nameElems = tds[0].childNodes[0].childNodes; 240 if (idType === "global parameter" || idType === "thread parameter") { 241 const tmpName = stringOrEmpty(nameElems[0].textContent); 242 startParen = tmpName.startsWith("("); 243 name = startParen ? tmpName.slice(1) : tmpName; 244 } else if (nameElems.length > 1) { 245 ({ startParen, name, endParen } = parseParamsAndName({ 246 nameElems, 247 startParen, 248 name, 249 params, 250 endParen, 251 })); 252 } else { 253 const tmpName = stringOrEmpty(nameElems[0].textContent).trim(); 254 startParen = tmpName.startsWith("("); 255 endParen = tmpName.endsWith(")"); 256 // eslint-disable-next-line no-nested-ternary 257 name = startParen 258 ? endParen 259 ? tmpName.slice(1).slice(0, -1) 260 : tmpName.slice(1) 261 : endParen 262 ? tmpName.slice(0, -1) 263 : tmpName; 264 } 265 const url = new URL( 266 // eslint-disable-next-line no-magic-numbers, dot-notation 267 (tds[2].childNodes[0] as HTMLAnchorElement).href.startsWith("./") 268 ? // eslint-disable-next-line no-magic-numbers 269 baseURL + (tds[2].childNodes[0] as HTMLAnchorElement).href 270 : // eslint-disable-next-line no-magic-numbers 271 (tds[2].childNodes[0] as HTMLAnchorElement).href 272 ); 273 url.protocol = "https"; 274 return { 275 name, 276 startParen, 277 endParen, 278 type: idType, 279 moduleNames: [], 280 params, 281 url, 282 description: "", 283 }; 284} 285 286/** 287 * Return the parsed data in the object 288 * `{ startParen: boolean; name: string; endParen: boolean }`. 289 * @param data The data needed for this function. 290 * @returns The parsed data in the object 291 * `{ startParen: boolean; name: string; endParen: boolean }`. 292 */ 293// eslint-disable-next-line max-statements 294function parseParamsAndName(data: { 295 nameElems: NodeListOf<ChildNode>; 296 startParen: boolean; 297 name: string; 298 params: string[]; 299 endParen: boolean; 300}): { startParen: boolean; name: string; endParen: boolean } { 301 const tmpName = stringOrEmpty(data.nameElems[0].textContent).trimStart(); 302 data.startParen = tmpName.startsWith("("); 303 data.name = data.startParen ? tmpName.slice(1) : tmpName; 304 305 // eslint-disable-next-line no-plusplus 306 for (let nameIdx = 1; nameIdx < data.nameElems.length - 1; nameIdx++) { 307 if (data.nameElems[nameIdx].nodeName === "I") { 308 data.params.push( 309 stringOrEmpty(data.nameElems[nameIdx].textContent) 310 ); 311 } 312 } 313 const end = stringOrEmpty( 314 data.nameElems[data.nameElems.length - 1].textContent 315 ).trim(); 316 if (end.endsWith(")")) { 317 if (end !== ")") { 318 data.params.push(end.slice(0, -1).trim()); 319 } 320 data.endParen = true; 321 } 322 return { 323 startParen: data.startParen, 324 name: data.name, 325 endParen: data.endParen, 326 }; 327} 328 329/** 330 * Download the identifier's description from the URL in the `FunctionDoc`, 331 * parse it and save it into the field `id.description`. 332 * @param id The `FunctionDoc` to process. 333 */ 334// eslint-disable-next-line max-statements 335async function addDescription(id: FunctionDoc) { 336 const htmlString = await downloadAndRead(id.url.toString()); 337 const htmlDoc = new JSDOM(htmlString).window.document; 338 const anchor = id.url.hash.slice(1); 339 let currP = htmlDoc.querySelector(`a[name="${anchor}"]`)?.closest("p"); 340 const first = currP; 341 const text = [""]; 342 while ( 343 currP && 344 (currP === first || 345 // eslint-disable-next-line no-eq-null, eqeqeq 346 currP.querySelector(`a[name]:not(a[name="${anchor}"])`) == null) 347 ) { 348 currP.childNodes.forEach((c) => parseChildNode(c, text)); 349 text.push("<br>\n"); 350 currP = currP.nextElementSibling as HTMLParagraphElement; 351 } 352 id.description = sanitizeDescription(text.join("")); 353} 354 355/** 356 * Parse the `FunctionDoc` description and add the needed libraries as 357 * `moduleNames` to the object. 358 * @param id The `FunctionDoc` object to process. 359 */ 360function addLibraries(id: FunctionDoc) { 361 const match = id.description.match(librariesRegex); 362 if (match) { 363 // eslint-disable-next-line prefer-destructuring 364 const librariesRaw = match[1]; 365 const libraries = librariesRaw.replace(/\\`/gu, "").split(/,\s*/gu); 366 id.moduleNames = libraries; 367 } else { 368 id.moduleNames = []; 369 } 370} 371 372/** 373 * Parse a single HTML node of the description. 374 * @param c The HTML node to parse. 375 * @param text The description's text to append to. 376 */ 377// eslint-disable-next-line max-lines-per-function 378function parseChildNode(c: ChildNode, text: string[]) { 379 switch (c.nodeName) { 380 case "BR": 381 text.push(`<br>\n`); 382 break; 383 case "B": 384 text.push(`**${c.textContent?.replace(/\n/gu, " ").trim()}** `); 385 break; 386 case "TT": 387 c.childNodes.forEach((cN) => { 388 switch (cN.nodeName) { 389 case "BR": 390 text.push(`<br>\n`); 391 break; 392 case "B": 393 text.push( 394 `**${cN.textContent 395 ?.replace(/\n/gu, " ") 396 .trim()}** ` 397 ); 398 break; 399 case "I": 400 text.push( 401 "*`" + 402 `${cN.textContent 403 ?.replace(/\n/gu, " ") 404 .trim()}` + 405 "`*" 406 ); 407 break; 408 case "IMG": 409 if ((cN as HTMLImageElement).src.endsWith("0.gif")) { 410 text.push("=>"); 411 } 412 break; 413 case "#text": 414 text.push( 415 // eslint-disable-next-line no-useless-concat 416 "`" + 417 `${cN.textContent?.replace(/\n/gu, " ")}` + 418 "`" 419 ); 420 421 break; 422 } 423 }); 424 break; 425 case "#text": 426 text.push(`${c.textContent?.replace(/\n/gu, " ")}`); 427 break; 428 case "SPAN": 429 c.childNodes.forEach((cN) => parseChildNode(cN, text)); 430 break; 431 } 432} 433 434/** 435 * Return a sanitized version of the given text. 436 * That is, without excessive whitespace and with escaped backticks and 437 * backslashes. Also puts examples at the end into one big code block instead of 438 * many individual backticks. 439 * @param text The description text to sanitize. 440 * @returns The sanitized description. 441 */ 442function sanitizeDescription(text: string): string { 443 let sanitized = text 444 .replace(/[ ]+/gu, " ") 445 .replace(/^ /gmu, "") 446 .replace(/[ ]+\n/gu, "\n") 447 .replace(/\n[\n]+$/u, "\n") 448 .replace(/\n\n[\n]+/gu, "\n\n") 449 // Non-breaking-space. 450 .replace(/\u00A0/gu, " ") 451 .replace(/\\/gu, "\\\\") 452 .replace(/`/gu, "\\`"); 453 const match = sanitized.match(exampleRegex); 454 if (match) { 455 // eslint-disable-next-line prefer-destructuring 456 const example = match[1]; 457 const exampleNoBackticks = example 458 .replace(lineFormatRegex, "$1") 459 .replace(/\\`/gu, "") 460 .replace(/^ /gmu, ""); 461 sanitized = sanitized.replace( 462 example, 463 "**Examples:**\n\n\\`\\`\\`scheme\n" + 464 exampleNoBackticks + 465 "\n\\`\\`\\`\n" 466 ); 467 } 468 return sanitized; 469} 470 471/** 472 * Return the string `s` if it isn't `undefined` or `null`, the empty string 473 * `""` else. Changes all non breaking spaces (`\u00A0`) to "normal" spaces. 474 * @param s The `string` or `undefined` value to "convert". 475 * @returns The string `s` if it isn't `undefined` or `null`, the empty string 476 * `""` else. 477 */ 478function stringOrEmpty(s: string | undefined | null): string { 479 return s ? s.replace(/\u00A0/gu, " ") : ""; 480} 481 482/** 483 * Return the filename to use for the downloaded file from the given URL. 484 * @param url The URL to generate the filename from. 485 * @returns The filename to use for the downloaded file from the given URL. 486 */ 487function fileNameFromURL(url: URL) { 488 return url.hostname + basename(url.pathname); 489} 490 491/** 492 * Download the given URL and return the content of the file. 493 * Exits the program if the download fails. Deletes the downloaded file after 494 * reading it 495 * @param url The URL of the website to download. 496 * @returns The content of the downloaded file. 497 */ 498async function downloadAndRead(url: string) { 499 const urlUrl = new URL(url); 500 const downloadTo = fileNameFromURL(urlUrl); 501 if (!existsSync(downloadTo)) { 502 try { 503 await download(urlUrl, downloadTo); 504 } catch (exp) { 505 console.error(`Caught "${exp}" trying to download from ${url}`); 506 process.exit(1); 507 } 508 } 509 filesToDelete.push(downloadTo); 510 return new Promise<string>((resolve, reject) => { 511 readFile(downloadTo, { encoding: "utf8" }, (r, d) => { 512 if (r) { 513 reject(r); 514 } 515 resolve(d); 516 }); 517 }); 518} 519 520/** 521 * Download a file to the given path `fileName`. 522 * @param url The URL to download. 523 * @param fileName The path to save the downloaded file to. 524 * @returns Nothing. 525 */ 526async function download(url: URL, fileName: string): Promise<void> { 527 const fileStream = createWriteStream(fileName); 528 return new Promise<void>((resolve, reject) => { 529 https.get(url, (res) => { 530 res.pipe(fileStream); 531 res.on("error", (e) => reject(e)); 532 fileStream.on("finish", () => 533 fileStream.close((err) => { 534 if (err) { 535 reject(err); 536 } 537 resolve(); 538 }) 539 ); 540 }); 541 }); 542} 543 544/** 545 * Write the parsed data to the file `outFilename`. 546 * @param text The text to save. 547 */ 548async function writeFunctionDocumentation(text: string): Promise<void> { 549 return new Promise<void>((resolve, reject) => { 550 writeFile(outFilename, text, { encoding: "utf8" }, (r) => { 551 if (r) { 552 reject(r); 553 } 554 resolve(); 555 }); 556 }); 557} 558 559main();