An Erlang lexer and syntax highlighter in Gleam
at main 54 kB view raw
1import gleam/int 2import gleam/list 3import gleam/option.{type Option, None, Some} 4import gleam/string 5import gleam_community/ansi 6import houdini 7import splitter.{type Splitter} 8 9pub opaque type Lexer { 10 Lexer( 11 source: String, 12 ignore_comments: Bool, 13 ignore_whitespace: Bool, 14 errors: List(Error), 15 splitters: Splitters, 16 ) 17} 18 19type Splitters { 20 Splitters( 21 until_end_of_line: Splitter, 22 string: Splitter, 23 quoted_atom: Splitter, 24 brace_escape_sequence: Splitter, 25 sigil: Splitter, 26 sigil_verbatim: Splitter, 27 triple_quoted_string: Splitter, 28 ) 29} 30 31pub type Error { 32 UnknownCharacter(character: String) 33 UnterminatedStringLiteral 34 UnterminatedQuotedAtom 35 InvalidRadix(radix: String) 36 NumericSeparatorNotAllowed 37 ExpectedExponent 38 NumberCannotEndAfterRadix 39 UnterminatedCharacter 40 UnterminatedEscapeSequence 41 ExpectedSigilDelimiter 42 ExpectedWhitespaceAfterTripleQuote 43 InvalidTripleQuotedStringIndentation( 44 expected_indentation: String, 45 line: String, 46 ) 47} 48 49pub fn stringify_error(error: Error) -> String { 50 case error { 51 UnterminatedQuotedAtom -> "Unterminated quoted atom" 52 UnterminatedStringLiteral -> "Unterminated string literal" 53 ExpectedExponent -> "Expected an exponent" 54 ExpectedSigilDelimiter -> "Expected a valid sigil delimiter after `~`" 55 ExpectedWhitespaceAfterTripleQuote -> 56 "Expected whitespace after a triple quote" 57 InvalidRadix(radix:) -> "Invalid numeric radix: " <> radix 58 InvalidTripleQuotedStringIndentation(expected_indentation:, line:) -> 59 "Invalid triple-quoted string: Expected the indentation `" 60 <> expected_indentation 61 <> "` preceding the line `" 62 <> line 63 <> "`" 64 NumberCannotEndAfterRadix -> 65 "Number cannot end directly after radix specification" 66 NumericSeparatorNotAllowed -> "Numeric separator is not allowed here" 67 UnknownCharacter(character:) -> 68 "Unexpected character: `" <> character <> "`" 69 UnterminatedCharacter -> "Unterminated character literal" 70 UnterminatedEscapeSequence -> "Unterminated escape sequence" 71 } 72} 73 74pub type Token { 75 // Whitespace and comments 76 Whitespace(String) 77 Comment(String) 78 DocComment(String) 79 ModuleComment(String) 80 EndOfFile 81 82 Character(String) 83 Integer(String) 84 Float(String) 85 Atom(name: String, quoted: Bool) 86 String(String) 87 TripleQuotedString( 88 sigil: option.Option(String), 89 number_of_quotes: Int, 90 beginning_whitespace: String, 91 lines: List(String), 92 end_indentation: String, 93 ) 94 Sigil(sigil: String, delimiter: SigilDelimiter, contents: String) 95 Variable(String) 96 97 // Keywords 98 After 99 Begin 100 Case 101 Catch 102 Cond 103 Else 104 End 105 Fun 106 If 107 Let 108 Maybe 109 Of 110 Receive 111 Try 112 When 113 114 // Grouping 115 LeftParen 116 RightParen 117 LeftBrace 118 RightBrace 119 LeftSquare 120 RightSquare 121 122 // Punctuation 123 Comma 124 Semicolon 125 Colon 126 Dot 127 MinusGreater 128 DoubleLess 129 DoubleGreater 130 Hash 131 DoubleColon 132 DoubleDot 133 TripleDot 134 DoublePipe 135 EqualGreater 136 ColonEqual 137 LessMinus 138 LessEqual 139 140 // Operators 141 Pipe 142 DoubleEqual 143 SlashEqual 144 EqualLess 145 Less 146 GreaterEqual 147 Greater 148 EqualColonEqual 149 EqualSlashEqual 150 Plus 151 Minus 152 Star 153 Slash 154 Bnot 155 Div 156 Rem 157 Band 158 Bor 159 Bxor 160 Bsl 161 Bsr 162 Not 163 And 164 Or 165 Xor 166 Andalso 167 Orelse 168 DoublePlus 169 DoubleMinus 170 QuestionEqual 171 Question 172 Bang 173 Equal 174 175 // Invalid tokens 176 Unknown(String) 177 UnterminatedString(String) 178 UnterminatedSigil(sigil: String, delimiter: SigilDelimiter, contents: String) 179 UnterminatedAtom(String) 180 InvalidTripleQuotedString(contents: String) 181} 182 183/// Convert a token back to its source code representation 184pub fn token_to_source(token: Token) -> String { 185 case token { 186 // Whitespace and comments 187 Whitespace(space) -> space 188 Comment(contents) -> "%" <> contents 189 DocComment(contents) -> "%%" <> contents 190 ModuleComment(contents) -> "%%%" <> contents 191 EndOfFile -> "" 192 193 Character(char) -> "$" <> char 194 Integer(int) -> int 195 Float(float) -> float 196 Atom(name:, quoted: True) -> "'" <> name <> "'" 197 Atom(name:, quoted: False) -> name 198 String(contents) -> "\"" <> contents <> "\"" 199 TripleQuotedString( 200 sigil:, 201 number_of_quotes:, 202 beginning_whitespace:, 203 lines:, 204 end_indentation:, 205 ) -> 206 case sigil { 207 option.None -> "" 208 option.Some(sigil) -> "~" <> sigil 209 } 210 <> string.repeat("\"", number_of_quotes) 211 <> beginning_whitespace 212 <> string.join( 213 list.map(lines, fn(line) { end_indentation <> line }), 214 "\n", 215 ) 216 <> "\n" 217 <> end_indentation 218 <> string.repeat("\"", number_of_quotes) 219 Sigil(sigil:, delimiter:, contents:) -> { 220 let #(opening, closing) = sigil_delimiters(delimiter) 221 "~" <> sigil <> opening <> contents <> closing 222 } 223 Variable(name) -> name 224 225 // Keywords 226 After -> "after" 227 Begin -> "begin" 228 Case -> "case" 229 Catch -> "catch" 230 Cond -> "cond" 231 Else -> "else" 232 End -> "end" 233 Fun -> "fun" 234 If -> "if" 235 Let -> "let" 236 Maybe -> "maybe" 237 Of -> "of" 238 Receive -> "receive" 239 Try -> "try" 240 When -> "when" 241 242 // Grouping 243 LeftParen -> "(" 244 RightParen -> ")" 245 LeftBrace -> "{" 246 RightBrace -> "}" 247 LeftSquare -> "[" 248 RightSquare -> "]" 249 250 // Punctuation 251 Comma -> "," 252 Semicolon -> ";" 253 Colon -> ":" 254 Dot -> "." 255 MinusGreater -> "->" 256 DoubleLess -> "<<" 257 DoubleGreater -> ">>" 258 Hash -> "#" 259 DoubleColon -> "::" 260 DoubleDot -> ".." 261 TripleDot -> "..." 262 DoublePipe -> "||" 263 EqualGreater -> "=>" 264 ColonEqual -> ":=" 265 LessMinus -> "<-" 266 LessEqual -> "<=" 267 268 // Operators 269 Pipe -> "|" 270 DoubleEqual -> "==" 271 SlashEqual -> "/=" 272 EqualLess -> "=<" 273 Less -> "<" 274 GreaterEqual -> ">=" 275 Greater -> ">" 276 EqualColonEqual -> "=:=" 277 EqualSlashEqual -> "=/=" 278 Plus -> "+" 279 Minus -> "-" 280 Star -> "*" 281 Slash -> "/" 282 Bnot -> "bnot" 283 Div -> "div" 284 Rem -> "rem" 285 Band -> "band" 286 Bor -> "bor" 287 Bxor -> "bxor" 288 Bsl -> "bsl" 289 Bsr -> "bsr" 290 Not -> "not" 291 And -> "and" 292 Or -> "or" 293 Xor -> "xor" 294 Andalso -> "andalso" 295 Orelse -> "orelse" 296 DoublePlus -> "++" 297 DoubleMinus -> "--" 298 QuestionEqual -> "?=" 299 Question -> "?" 300 Bang -> "!" 301 Equal -> "=" 302 303 // Invalid tokens 304 Unknown(char) -> char 305 UnterminatedString(contents) -> "\"" <> contents 306 UnterminatedSigil(sigil:, contents:, delimiter:) -> { 307 let #(opening, _closing) = sigil_delimiters(delimiter) 308 "~" <> sigil <> opening <> contents 309 } 310 UnterminatedAtom(contents) -> "'" <> contents 311 InvalidTripleQuotedString(contents) -> "\"\"\"" <> contents <> "\"\"\"" 312 } 313} 314 315/// Convert a list of tokens back to their original source code 316pub fn to_source(tokens: List(Token)) -> String { 317 list.fold(tokens, "", fn(code, token) { code <> token_to_source(token) }) 318} 319 320pub type SigilDelimiter { 321 SigilNone 322 SigilParen 323 SigilSquare 324 SigilBrace 325 SigilAngle 326 SigilSlash 327 SigilPipe 328 SigilSingleQuote 329 SigilDoubleQuote 330 SigilBacktick 331 SigilHash 332} 333 334/// Get the beginning and ending characters for a sigil 335pub fn sigil_delimiters(delimiter: SigilDelimiter) -> #(String, String) { 336 case delimiter { 337 SigilNone -> #("", "") 338 SigilAngle -> #("<", ">") 339 SigilBacktick -> #("`", "`") 340 SigilBrace -> #("{", "}") 341 SigilDoubleQuote -> #("\"", "\"") 342 SigilHash -> #("#", "#") 343 SigilParen -> #("(", ")") 344 SigilPipe -> #("|", "|") 345 SigilSingleQuote -> #("'", "'") 346 SigilSlash -> #("/", "/") 347 SigilSquare -> #("[", "]") 348 } 349} 350 351pub fn new(source: String) -> Lexer { 352 Lexer( 353 source:, 354 ignore_comments: False, 355 ignore_whitespace: False, 356 errors: [], 357 splitters: make_splitters(), 358 ) 359} 360 361fn make_splitters() -> Splitters { 362 Splitters( 363 until_end_of_line: splitter.new(["\n", "\r\n"]), 364 string: splitter.new(["\"", "\\"]), 365 quoted_atom: splitter.new(["'", "\\"]), 366 brace_escape_sequence: splitter.new(["}", "\n", "\r\n"]), 367 sigil: splitter.new([ 368 ")", "]", "}", ">", "/", "|", "'", "\"", "`", "#", "\\", 369 ]), 370 sigil_verbatim: splitter.new([ 371 ")", "]", "}", ">", "/", "|", "'", "\"", "`", "#", 372 ]), 373 triple_quoted_string: splitter.new(["\n", "\r\n", "\"\"\""]), 374 ) 375} 376 377pub fn ignore_comments(lexer: Lexer) -> Lexer { 378 Lexer(..lexer, ignore_comments: True) 379} 380 381pub fn ignore_whitespace(lexer: Lexer) -> Lexer { 382 Lexer(..lexer, ignore_whitespace: True) 383} 384 385pub fn tokenise(lexer: Lexer) -> #(List(Token), List(Error)) { 386 do_tokenise(lexer, []) 387} 388 389fn do_tokenise(lexer: Lexer, tokens: List(Token)) -> #(List(Token), List(Error)) { 390 case next(lexer) { 391 #(lexer, EndOfFile) -> #( 392 list.reverse([EndOfFile, ..tokens]), 393 list.reverse(lexer.errors), 394 ) 395 #(lexer, token) -> do_tokenise(lexer, [token, ..tokens]) 396 } 397} 398 399fn next(lexer: Lexer) -> #(Lexer, Token) { 400 case lexer.source { 401 "" -> #(lexer, EndOfFile) 402 403 " " as space <> source 404 | "\n" as space <> source 405 | "\r" as space <> source 406 | "\t" as space <> source 407 | "\f" as space <> source -> lex_whitespace(advance(lexer, source), space) 408 409 "%%%" <> source -> { 410 let #(lexer, contents) = lex_until_end_of_line(advance(lexer, source)) 411 maybe_token(lexer, ModuleComment(contents), !lexer.ignore_comments) 412 } 413 "%%" <> source -> { 414 let #(lexer, contents) = lex_until_end_of_line(advance(lexer, source)) 415 maybe_token(lexer, DocComment(contents), !lexer.ignore_comments) 416 } 417 "%" <> source -> { 418 let #(lexer, contents) = lex_until_end_of_line(advance(lexer, source)) 419 maybe_token(lexer, Comment(contents), !lexer.ignore_comments) 420 } 421 422 "::" <> source -> #(advance(lexer, source), DoubleColon) 423 ":=" <> source -> #(advance(lexer, source), ColonEqual) 424 ":" <> source -> #(advance(lexer, source), Colon) 425 "..." <> source -> #(advance(lexer, source), TripleDot) 426 ".." <> source -> #(advance(lexer, source), DoubleDot) 427 428 "(" <> source -> #(advance(lexer, source), LeftParen) 429 ")" <> source -> #(advance(lexer, source), RightParen) 430 "{" <> source -> #(advance(lexer, source), LeftBrace) 431 "}" <> source -> #(advance(lexer, source), RightBrace) 432 "[" <> source -> #(advance(lexer, source), LeftSquare) 433 "]" <> source -> #(advance(lexer, source), RightSquare) 434 435 "," <> source -> #(advance(lexer, source), Comma) 436 ";" <> source -> #(advance(lexer, source), Semicolon) 437 "." <> source -> #(advance(lexer, source), Dot) 438 "->" <> source -> #(advance(lexer, source), MinusGreater) 439 "<<" <> source -> #(advance(lexer, source), DoubleLess) 440 ">>" <> source -> #(advance(lexer, source), DoubleGreater) 441 "#" <> source -> #(advance(lexer, source), Hash) 442 "||" <> source -> #(advance(lexer, source), DoublePipe) 443 "=>" <> source -> #(advance(lexer, source), EqualGreater) 444 "<-" <> source -> #(advance(lexer, source), LessMinus) 445 "<=" <> source -> #(advance(lexer, source), LessEqual) 446 "|" <> source -> #(advance(lexer, source), Pipe) 447 448 "++" <> source -> #(advance(lexer, source), DoublePlus) 449 "--" <> source -> #(advance(lexer, source), DoubleMinus) 450 "==" <> source -> #(advance(lexer, source), DoubleEqual) 451 "/=" <> source -> #(advance(lexer, source), SlashEqual) 452 "=<" <> source -> #(advance(lexer, source), EqualLess) 453 "<" <> source -> #(advance(lexer, source), Less) 454 ">=" <> source -> #(advance(lexer, source), GreaterEqual) 455 ">" <> source -> #(advance(lexer, source), Greater) 456 "=:=" <> source -> #(advance(lexer, source), EqualColonEqual) 457 "=/=" <> source -> #(advance(lexer, source), EqualSlashEqual) 458 "+" <> source -> #(advance(lexer, source), Plus) 459 "-" <> source -> #(advance(lexer, source), Minus) 460 "*" <> source -> #(advance(lexer, source), Star) 461 "/" <> source -> #(advance(lexer, source), Slash) 462 "?=" <> source -> #(advance(lexer, source), QuestionEqual) 463 "?" <> source -> #(advance(lexer, source), Question) 464 "!" <> source -> #(advance(lexer, source), Bang) 465 "=" <> source -> #(advance(lexer, source), Equal) 466 467 "a" as char <> source 468 | "b" as char <> source 469 | "c" as char <> source 470 | "d" as char <> source 471 | "e" as char <> source 472 | "f" as char <> source 473 | "g" as char <> source 474 | "h" as char <> source 475 | "i" as char <> source 476 | "j" as char <> source 477 | "k" as char <> source 478 | "l" as char <> source 479 | "m" as char <> source 480 | "n" as char <> source 481 | "o" as char <> source 482 | "p" as char <> source 483 | "q" as char <> source 484 | "r" as char <> source 485 | "s" as char <> source 486 | "t" as char <> source 487 | "u" as char <> source 488 | "v" as char <> source 489 | "w" as char <> source 490 | "x" as char <> source 491 | "y" as char <> source 492 | "z" as char <> source -> lex_atom(advance(lexer, source), char) 493 494 "A" as char <> source 495 | "B" as char <> source 496 | "C" as char <> source 497 | "D" as char <> source 498 | "E" as char <> source 499 | "F" as char <> source 500 | "G" as char <> source 501 | "H" as char <> source 502 | "I" as char <> source 503 | "J" as char <> source 504 | "K" as char <> source 505 | "L" as char <> source 506 | "M" as char <> source 507 | "N" as char <> source 508 | "O" as char <> source 509 | "P" as char <> source 510 | "Q" as char <> source 511 | "R" as char <> source 512 | "S" as char <> source 513 | "T" as char <> source 514 | "U" as char <> source 515 | "V" as char <> source 516 | "W" as char <> source 517 | "X" as char <> source 518 | "Y" as char <> source 519 | "Z" as char <> source 520 | "_" as char <> source -> lex_variable(advance(lexer, source), char) 521 522 "0" as char <> source 523 | "1" as char <> source 524 | "2" as char <> source 525 | "3" as char <> source 526 | "4" as char <> source 527 | "5" as char <> source 528 | "6" as char <> source 529 | "7" as char <> source 530 | "8" as char <> source 531 | "9" as char <> source -> 532 lex_number(advance(lexer, source), char, Initial, AfterNumber) 533 534 "\"\"\"" <> source -> lex_triple_quoted_string(advance(lexer, source), None) 535 536 "\"" <> source -> lex_string(advance(lexer, source), "") 537 "'" <> source -> lex_quoted_atom(advance(lexer, source), "") 538 539 "$" <> source -> lex_character(advance(lexer, source)) 540 541 "~" <> source -> lex_sigil(advance(lexer, source)) 542 543 _ -> 544 case string.pop_grapheme(lexer.source) { 545 Error(_) -> #(lexer, EndOfFile) 546 Ok(#(char, source)) -> #( 547 advance(error(lexer, UnknownCharacter(char)), source), 548 Unknown(char), 549 ) 550 } 551 } 552} 553 554fn lex_character(lexer: Lexer) -> #(Lexer, Token) { 555 case lexer.source { 556 "\\" <> source -> { 557 let #(lexer, escape_sequence) = 558 lex_escape_sequence(advance(lexer, source)) 559 #(lexer, Character("\\" <> escape_sequence)) 560 } 561 _ -> 562 case string.pop_grapheme(lexer.source) { 563 Ok(#(char, source)) -> #(advance(lexer, source), Character(char)) 564 Error(_) -> #(error(lexer, UnterminatedCharacter), Character("")) 565 } 566 } 567} 568 569fn lex_escape_sequence(lexer: Lexer) -> #(Lexer, String) { 570 case lexer.source { 571 "^a" as sequence <> source 572 | "^b" as sequence <> source 573 | "^c" as sequence <> source 574 | "^d" as sequence <> source 575 | "^e" as sequence <> source 576 | "^f" as sequence <> source 577 | "^g" as sequence <> source 578 | "^h" as sequence <> source 579 | "^i" as sequence <> source 580 | "^j" as sequence <> source 581 | "^k" as sequence <> source 582 | "^l" as sequence <> source 583 | "^m" as sequence <> source 584 | "^n" as sequence <> source 585 | "^o" as sequence <> source 586 | "^p" as sequence <> source 587 | "^q" as sequence <> source 588 | "^r" as sequence <> source 589 | "^s" as sequence <> source 590 | "^t" as sequence <> source 591 | "^u" as sequence <> source 592 | "^v" as sequence <> source 593 | "^w" as sequence <> source 594 | "^x" as sequence <> source 595 | "^y" as sequence <> source 596 | "^z" as sequence <> source 597 | "^A" as sequence <> source 598 | "^B" as sequence <> source 599 | "^C" as sequence <> source 600 | "^D" as sequence <> source 601 | "^E" as sequence <> source 602 | "^F" as sequence <> source 603 | "^G" as sequence <> source 604 | "^H" as sequence <> source 605 | "^I" as sequence <> source 606 | "^J" as sequence <> source 607 | "^K" as sequence <> source 608 | "^L" as sequence <> source 609 | "^M" as sequence <> source 610 | "^N" as sequence <> source 611 | "^O" as sequence <> source 612 | "^P" as sequence <> source 613 | "^Q" as sequence <> source 614 | "^R" as sequence <> source 615 | "^S" as sequence <> source 616 | "^T" as sequence <> source 617 | "^U" as sequence <> source 618 | "^V" as sequence <> source 619 | "^W" as sequence <> source 620 | "^X" as sequence <> source 621 | "^Y" as sequence <> source 622 | "^Z" as sequence <> source 623 | "^@" as sequence <> source 624 | "^[" as sequence <> source 625 | "^\\" as sequence <> source 626 | "^]" as sequence <> source 627 | "^^" as sequence <> source 628 | "^_" as sequence <> source 629 | "^?" as sequence <> source -> #(advance(lexer, source), sequence) 630 631 "x{" <> _source -> lex_brace_escape_sequence(lexer) 632 "x" <> source -> lex_hex_escape_sequence(advance(lexer, source)) 633 634 "0" as char <> source 635 | "1" as char <> source 636 | "2" as char <> source 637 | "3" as char <> source 638 | "4" as char <> source 639 | "5" as char <> source 640 | "6" as char <> source 641 | "7" as char <> source -> 642 lex_octal_escape_sequence(advance(lexer, source), char) 643 644 _ -> 645 case string.pop_grapheme(lexer.source) { 646 Error(_) -> #(error(lexer, UnterminatedEscapeSequence), "") 647 Ok(#(char, source)) -> #(advance(lexer, source), char) 648 } 649 } 650} 651 652fn lex_octal_escape_sequence(lexer: Lexer, first: String) -> #(Lexer, String) { 653 case extract_octal_digit(lexer) { 654 Error(_) -> #(lexer, first) 655 Ok(#(lexer, second)) -> 656 case extract_octal_digit(lexer) { 657 Error(_) -> #(lexer, first <> second) 658 Ok(#(lexer, third)) -> #(lexer, first <> second <> third) 659 } 660 } 661} 662 663fn extract_octal_digit(lexer: Lexer) -> Result(#(Lexer, String), Nil) { 664 case lexer.source { 665 "0" as char <> source 666 | "1" as char <> source 667 | "2" as char <> source 668 | "3" as char <> source 669 | "4" as char <> source 670 | "5" as char <> source 671 | "6" as char <> source 672 | "7" as char <> source -> Ok(#(advance(lexer, source), char)) 673 _ -> Error(Nil) 674 } 675} 676 677fn lex_hex_escape_sequence(lexer: Lexer) -> #(Lexer, String) { 678 case extract_hex_digit(lexer) { 679 Error(_) -> #(error(lexer, UnterminatedEscapeSequence), "x") 680 Ok(#(lexer, first)) -> 681 case extract_hex_digit(lexer) { 682 Error(_) -> #(error(lexer, UnterminatedEscapeSequence), "x" <> first) 683 Ok(#(lexer, second)) -> #(lexer, "x" <> first <> second) 684 } 685 } 686} 687 688fn extract_hex_digit(lexer: Lexer) -> Result(#(Lexer, String), Nil) { 689 case lexer.source { 690 "0" as char <> source 691 | "1" as char <> source 692 | "2" as char <> source 693 | "3" as char <> source 694 | "4" as char <> source 695 | "5" as char <> source 696 | "6" as char <> source 697 | "7" as char <> source 698 | "8" as char <> source 699 | "9" as char <> source 700 | "a" as char <> source 701 | "b" as char <> source 702 | "c" as char <> source 703 | "d" as char <> source 704 | "e" as char <> source 705 | "f" as char <> source 706 | "A" as char <> source 707 | "B" as char <> source 708 | "C" as char <> source 709 | "D" as char <> source 710 | "E" as char <> source 711 | "F" as char <> source -> Ok(#(advance(lexer, source), char)) 712 _ -> Error(Nil) 713 } 714} 715 716fn lex_brace_escape_sequence(lexer: Lexer) -> #(Lexer, String) { 717 case 718 splitter.split_after(lexer.splitters.brace_escape_sequence, lexer.source) 719 { 720 #(before, "") -> #(error(lexer, UnterminatedEscapeSequence), before) 721 #(before, after) -> #(advance(lexer, after), before) 722 } 723} 724 725type LexNumberMode { 726 Initial 727 Radix(Int) 728 Decimal 729 Exponent 730} 731 732type DelimitedPosition { 733 AfterDecimal 734 AfterNumber 735 AfterSeparator 736 AfterExponent 737 AfterRadix 738} 739 740fn lex_number( 741 lexer: Lexer, 742 lexed: String, 743 mode: LexNumberMode, 744 position: DelimitedPosition, 745) -> #(Lexer, Token) { 746 let radix = case mode { 747 Radix(r) -> r 748 Initial | Decimal | Exponent -> 10 749 } 750 751 case lexer.source { 752 "0" as char <> source | "1" as char <> source -> 753 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 754 "2" as char <> source if radix >= 3 -> 755 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 756 "3" as char <> source if radix >= 4 -> 757 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 758 "4" as char <> source if radix >= 5 -> 759 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 760 "5" as char <> source if radix >= 6 -> 761 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 762 "6" as char <> source if radix >= 7 -> 763 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 764 "7" as char <> source if radix >= 8 -> 765 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 766 "8" as char <> source if radix >= 9 -> 767 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 768 "9" as char <> source if radix >= 10 -> 769 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 770 "a" as char <> source | "A" as char <> source if radix >= 11 -> 771 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 772 "b" as char <> source | "B" as char <> source if radix >= 12 -> 773 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 774 "c" as char <> source | "C" as char <> source if radix >= 13 -> 775 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 776 "d" as char <> source | "D" as char <> source if radix >= 14 -> 777 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 778 "e" as char <> source | "E" as char <> source if radix >= 15 -> 779 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 780 "f" as char <> source | "F" as char <> source if radix >= 16 -> 781 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 782 "g" as char <> source | "G" as char <> source if radix >= 17 -> 783 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 784 "h" as char <> source | "H" as char <> source if radix >= 18 -> 785 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 786 "i" as char <> source | "I" as char <> source if radix >= 19 -> 787 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 788 "j" as char <> source | "J" as char <> source if radix >= 20 -> 789 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 790 "k" as char <> source | "K" as char <> source if radix >= 21 -> 791 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 792 "l" as char <> source | "L" as char <> source if radix >= 22 -> 793 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 794 "m" as char <> source | "M" as char <> source if radix >= 23 -> 795 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 796 "n" as char <> source | "N" as char <> source if radix >= 24 -> 797 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 798 "o" as char <> source | "O" as char <> source if radix >= 25 -> 799 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 800 "p" as char <> source | "P" as char <> source if radix >= 26 -> 801 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 802 "q" as char <> source | "Q" as char <> source if radix >= 27 -> 803 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 804 "r" as char <> source | "R" as char <> source if radix >= 28 -> 805 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 806 "s" as char <> source | "S" as char <> source if radix >= 29 -> 807 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 808 "t" as char <> source | "T" as char <> source if radix >= 30 -> 809 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 810 "u" as char <> source | "U" as char <> source if radix >= 31 -> 811 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 812 "v" as char <> source | "V" as char <> source if radix >= 32 -> 813 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 814 "w" as char <> source | "W" as char <> source if radix >= 33 -> 815 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 816 "x" as char <> source | "X" as char <> source if radix >= 34 -> 817 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 818 "y" as char <> source | "Y" as char <> source if radix >= 35 -> 819 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 820 "z" as char <> source | "Z" as char <> source if radix >= 36 -> 821 lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 822 823 "#" <> source if mode == Initial && position == AfterNumber -> 824 case int.parse(string.replace(in: lexed, each: "_", with: "")) { 825 Error(_) -> #( 826 error(advance(lexer, source), InvalidRadix(lexed)), 827 Integer(lexed), 828 ) 829 Ok(radix) if radix < 2 || radix > 36 -> #( 830 error(advance(lexer, source), InvalidRadix(lexed)), 831 Integer(lexed), 832 ) 833 Ok(radix) -> 834 lex_number( 835 advance(lexer, source), 836 lexed <> "#", 837 Radix(radix), 838 AfterRadix, 839 ) 840 } 841 842 "_" <> source if position == AfterNumber -> 843 lex_number(advance(lexer, source), lexed <> "_", mode, AfterSeparator) 844 845 "_" <> _ -> #(error(lexer, NumericSeparatorNotAllowed), Integer(lexed)) 846 847 "." <> source if mode == Initial && position == AfterNumber -> 848 lex_number(advance(lexer, source), lexed <> ".", Decimal, AfterDecimal) 849 850 "e-" as prefix <> source 851 | "e" as prefix <> source 852 | "E-" as prefix <> source 853 | "E" as prefix <> source 854 if mode == Decimal && position == AfterNumber 855 -> 856 lex_number( 857 advance(lexer, source), 858 lexed <> prefix, 859 Exponent, 860 AfterExponent, 861 ) 862 863 _ -> { 864 let token = case mode { 865 Decimal | Exponent -> Float(lexed) 866 Initial | Radix(_) -> Integer(lexed) 867 } 868 case position { 869 // If we have some code that looks like `15.`, that is valid syntax, 870 // but it's an integer followed by a dot, not a float. 871 AfterDecimal -> #( 872 advance(lexer, "." <> lexer.source), 873 Integer(string.drop_end(lexed, 1)), 874 ) 875 AfterExponent -> #(error(lexer, ExpectedExponent), token) 876 AfterRadix -> #(error(lexer, NumberCannotEndAfterRadix), token) 877 AfterNumber -> #(lexer, token) 878 AfterSeparator -> #(error(lexer, NumericSeparatorNotAllowed), token) 879 } 880 } 881 } 882} 883 884fn lex_sigil(lexer: Lexer) -> #(Lexer, Token) { 885 let #(lexer, sigil, verbatim) = case lexer.source { 886 "b" as sigil <> source | "s" as sigil <> source -> #( 887 advance(lexer, source), 888 sigil, 889 False, 890 ) 891 892 "B" as sigil <> source | "S" as sigil <> source -> #( 893 advance(lexer, source), 894 sigil, 895 True, 896 ) 897 _ -> #(lexer, "", False) 898 } 899 900 case lexer.source { 901 "\"\"\"" <> source -> 902 lex_triple_quoted_string(advance(lexer, source), Some(sigil)) 903 _ -> { 904 let #(lexer, delimiter, closing_char) = case lexer.source { 905 "(" <> source -> #(advance(lexer, source), SigilParen, ")") 906 "[" <> source -> #(advance(lexer, source), SigilSquare, "]") 907 "{" <> source -> #(advance(lexer, source), SigilBrace, "}") 908 "<" <> source -> #(advance(lexer, source), SigilAngle, ">") 909 910 "/" <> source -> #(advance(lexer, source), SigilSlash, "/") 911 "|" <> source -> #(advance(lexer, source), SigilPipe, "|") 912 "'" <> source -> #(advance(lexer, source), SigilSingleQuote, "'") 913 "\"" <> source -> #(advance(lexer, source), SigilDoubleQuote, "\"") 914 "`" <> source -> #(advance(lexer, source), SigilBacktick, "`") 915 "#" <> source -> #(advance(lexer, source), SigilHash, "#") 916 917 _ -> #(error(lexer, ExpectedSigilDelimiter), SigilNone, "") 918 } 919 920 case delimiter { 921 SigilNone -> #( 922 lexer, 923 UnterminatedSigil(sigil:, delimiter:, contents: ""), 924 ) 925 _ -> { 926 let splitter = case verbatim { 927 False -> lexer.splitters.sigil 928 True -> lexer.splitters.sigil_verbatim 929 } 930 931 do_lex_sigil(lexer, sigil, delimiter, closing_char, splitter, "") 932 } 933 } 934 } 935 } 936} 937 938fn do_lex_sigil( 939 lexer: Lexer, 940 sigil: String, 941 delimiter: SigilDelimiter, 942 closing_char: String, 943 splitter: Splitter, 944 contents: String, 945) -> #(Lexer, Token) { 946 let #(before, split, after) = splitter.split(splitter, lexer.source) 947 case split { 948 "" -> #( 949 error(advance(lexer, after), UnterminatedStringLiteral), 950 UnterminatedSigil(sigil:, delimiter:, contents: contents <> before), 951 ) 952 953 "\\" -> 954 case string.pop_grapheme(after) { 955 Error(_) -> #( 956 error(advance(lexer, after), UnterminatedStringLiteral), 957 UnterminatedSigil( 958 sigil:, 959 delimiter:, 960 contents: contents <> before <> "\\", 961 ), 962 ) 963 Ok(#(character, source)) -> 964 do_lex_sigil( 965 advance(lexer, source), 966 sigil, 967 delimiter, 968 closing_char, 969 splitter, 970 contents <> before <> "\\" <> character, 971 ) 972 } 973 974 _ if split == closing_char -> #( 975 advance(lexer, after), 976 Sigil(sigil:, delimiter:, contents: contents <> before), 977 ) 978 979 // Here, we've split on a delimiter which doesn't match the current sigil. 980 // In this case, we must continue lexing until we find a delimiter of the 981 // correct kind. 982 _ -> 983 do_lex_sigil( 984 advance(lexer, after), 985 sigil, 986 delimiter, 987 closing_char, 988 splitter, 989 contents <> before <> split, 990 ) 991 } 992} 993 994fn lex_string(lexer: Lexer, contents: String) -> #(Lexer, Token) { 995 let #(before, split, after) = 996 splitter.split(lexer.splitters.string, lexer.source) 997 case split { 998 "" -> #( 999 error(advance(lexer, after), UnterminatedStringLiteral), 1000 UnterminatedString(contents <> before), 1001 ) 1002 1003 "\\" -> { 1004 let #(lexer, escape) = lex_escape_sequence(advance(lexer, after)) 1005 lex_string(lexer, contents <> before <> "\\" <> escape) 1006 } 1007 1008 _ -> #(advance(lexer, after), String(contents <> before)) 1009 } 1010} 1011 1012fn lex_triple_quoted_string( 1013 lexer: Lexer, 1014 sigil: Option(String), 1015) -> #(Lexer, Token) { 1016 let #(lexer, extra_quotes) = count_extra_quotes(lexer, 0) 1017 1018 let #(lexer, beginning_whitespace) = case 1019 splitter.split(lexer.splitters.until_end_of_line, lexer.source) 1020 { 1021 #(_, "", _) -> #(error(lexer, ExpectedWhitespaceAfterTripleQuote), "") 1022 #(before, newline, after) -> 1023 case is_whitespace(before) { 1024 True -> #(advance(lexer, after), before <> newline) 1025 False -> #(error(lexer, ExpectedWhitespaceAfterTripleQuote), "") 1026 } 1027 } 1028 1029 let #(lexer, lines, end_indentation) = 1030 lex_triple_quoted_string_contents(lexer, [], "", extra_quotes) 1031 1032 case strip_line_prefixes(lines, end_indentation, []) { 1033 Error(line) -> { 1034 let contents = 1035 beginning_whitespace 1036 <> string.join(list.reverse(lines), "\n") 1037 <> "\n" 1038 <> end_indentation 1039 #( 1040 error( 1041 lexer, 1042 InvalidTripleQuotedStringIndentation( 1043 expected_indentation: end_indentation, 1044 line:, 1045 ), 1046 ), 1047 InvalidTripleQuotedString(contents), 1048 ) 1049 } 1050 Ok(lines) -> #( 1051 lexer, 1052 TripleQuotedString( 1053 sigil:, 1054 number_of_quotes: extra_quotes + 3, 1055 beginning_whitespace:, 1056 lines:, 1057 end_indentation:, 1058 ), 1059 ) 1060 } 1061} 1062 1063fn count_extra_quotes(lexer: Lexer, extra: Int) -> #(Lexer, Int) { 1064 case lexer.source { 1065 "\"" <> source -> count_extra_quotes(advance(lexer, source), extra + 1) 1066 _ -> #(lexer, extra) 1067 } 1068} 1069 1070fn is_whitespace(string: String) -> Bool { 1071 case string { 1072 "" -> True 1073 " " <> string 1074 | "\n" <> string 1075 | "\r" <> string 1076 | "\t" <> string 1077 | "\f" <> string -> is_whitespace(string) 1078 _ -> False 1079 } 1080} 1081 1082fn strip_line_prefixes( 1083 lines: List(String), 1084 end_indentation: String, 1085 acc: List(String), 1086) -> Result(List(String), String) { 1087 case lines { 1088 [] -> Ok(acc) 1089 [line, ..lines] -> 1090 case strip_prefix(line, end_indentation) { 1091 Ok(line) -> strip_line_prefixes(lines, end_indentation, [line, ..acc]) 1092 Error(_) -> Error(line) 1093 } 1094 } 1095} 1096 1097@external(erlang, "pearl_ffi", "strip_prefix") 1098@external(javascript, "./pearl_ffi.mjs", "strip_prefix") 1099fn strip_prefix(string: String, prefix: String) -> Result(String, Nil) 1100 1101fn lex_triple_quoted_string_contents( 1102 lexer: Lexer, 1103 lines: List(String), 1104 current_line: String, 1105 extra_quotes: Int, 1106) -> #(Lexer, List(String), String) { 1107 let #(before, split, after) = 1108 splitter.split(lexer.splitters.triple_quoted_string, lexer.source) 1109 1110 let before = current_line <> before 1111 1112 case split { 1113 "\"\"\"" -> { 1114 let lexer = advance(lexer, after) 1115 case is_whitespace(before) { 1116 False -> 1117 lex_triple_quoted_string_contents( 1118 lexer, 1119 lines, 1120 before <> "\"\"\"", 1121 extra_quotes, 1122 ) 1123 True if extra_quotes == 0 -> #(lexer, lines, before) 1124 True -> 1125 case consume_extra_quotes(lexer, extra_quotes) { 1126 Ok(lexer) -> #(lexer, lines, before) 1127 Error(Nil) -> 1128 lex_triple_quoted_string_contents( 1129 lexer, 1130 lines, 1131 before <> "\"\"\"", 1132 extra_quotes, 1133 ) 1134 } 1135 } 1136 } 1137 1138 "\n" | "\r\n" -> 1139 lex_triple_quoted_string_contents( 1140 advance(lexer, after), 1141 [before, ..lines], 1142 "", 1143 extra_quotes, 1144 ) 1145 1146 _ -> #(error(lexer, UnterminatedStringLiteral), [before, ..lines], "") 1147 } 1148} 1149 1150fn consume_extra_quotes(lexer: Lexer, extra_quotes: Int) -> Result(Lexer, Nil) { 1151 case extra_quotes, lexer.source { 1152 0, _ -> Ok(lexer) 1153 _, "\"" <> source -> 1154 consume_extra_quotes(advance(lexer, source), extra_quotes - 1) 1155 _, _ -> Error(Nil) 1156 } 1157} 1158 1159fn lex_quoted_atom(lexer: Lexer, contents: String) -> #(Lexer, Token) { 1160 let #(before, split, after) = 1161 splitter.split(lexer.splitters.quoted_atom, lexer.source) 1162 case split { 1163 "" -> #( 1164 error(advance(lexer, after), UnterminatedQuotedAtom), 1165 UnterminatedAtom(contents <> before), 1166 ) 1167 1168 "\\" -> 1169 case string.pop_grapheme(after) { 1170 Error(_) -> #( 1171 error(advance(lexer, after), UnterminatedStringLiteral), 1172 UnterminatedString(contents), 1173 ) 1174 Ok(#(character, source)) -> 1175 lex_string( 1176 advance(lexer, source), 1177 contents <> before <> "\\" <> character, 1178 ) 1179 } 1180 1181 _ -> #(advance(lexer, after), Atom(contents <> before, True)) 1182 } 1183} 1184 1185fn lex_variable_or_atom(lexer: Lexer, lexed: String) -> #(Lexer, String) { 1186 case lexer.source { 1187 "a" as char <> source 1188 | "b" as char <> source 1189 | "c" as char <> source 1190 | "d" as char <> source 1191 | "e" as char <> source 1192 | "f" as char <> source 1193 | "g" as char <> source 1194 | "h" as char <> source 1195 | "i" as char <> source 1196 | "j" as char <> source 1197 | "k" as char <> source 1198 | "l" as char <> source 1199 | "m" as char <> source 1200 | "n" as char <> source 1201 | "o" as char <> source 1202 | "p" as char <> source 1203 | "q" as char <> source 1204 | "r" as char <> source 1205 | "s" as char <> source 1206 | "t" as char <> source 1207 | "u" as char <> source 1208 | "v" as char <> source 1209 | "w" as char <> source 1210 | "x" as char <> source 1211 | "y" as char <> source 1212 | "z" as char <> source 1213 | "A" as char <> source 1214 | "B" as char <> source 1215 | "C" as char <> source 1216 | "D" as char <> source 1217 | "E" as char <> source 1218 | "F" as char <> source 1219 | "G" as char <> source 1220 | "H" as char <> source 1221 | "I" as char <> source 1222 | "J" as char <> source 1223 | "K" as char <> source 1224 | "L" as char <> source 1225 | "M" as char <> source 1226 | "N" as char <> source 1227 | "O" as char <> source 1228 | "P" as char <> source 1229 | "Q" as char <> source 1230 | "R" as char <> source 1231 | "S" as char <> source 1232 | "T" as char <> source 1233 | "U" as char <> source 1234 | "V" as char <> source 1235 | "W" as char <> source 1236 | "X" as char <> source 1237 | "Y" as char <> source 1238 | "Z" as char <> source 1239 | "0" as char <> source 1240 | "1" as char <> source 1241 | "2" as char <> source 1242 | "3" as char <> source 1243 | "4" as char <> source 1244 | "5" as char <> source 1245 | "6" as char <> source 1246 | "7" as char <> source 1247 | "8" as char <> source 1248 | "9" as char <> source 1249 | "_" as char <> source 1250 | "@" as char <> source -> 1251 lex_variable_or_atom(advance(lexer, source), lexed <> char) 1252 1253 _ -> #(lexer, lexed) 1254 } 1255} 1256 1257fn lex_variable(lexer: Lexer, char: String) -> #(Lexer, Token) { 1258 let #(lexer, name) = lex_variable_or_atom(lexer, char) 1259 #(lexer, Variable(name)) 1260} 1261 1262fn lex_atom(lexer: Lexer, char: String) -> #(Lexer, Token) { 1263 let #(lexer, name) = lex_variable_or_atom(lexer, char) 1264 1265 let token = case name { 1266 "after" -> After 1267 "begin" -> Begin 1268 "case" -> Case 1269 "catch" -> Catch 1270 "cond" -> Cond 1271 "else" -> Else 1272 "end" -> End 1273 "fun" -> Fun 1274 "if" -> If 1275 "let" -> Let 1276 "maybe" -> Maybe 1277 "of" -> Of 1278 "receive" -> Receive 1279 "try" -> Try 1280 "when" -> When 1281 "bnot" -> Bnot 1282 "div" -> Div 1283 "rem" -> Rem 1284 "band" -> Band 1285 "bor" -> Bor 1286 "bxor" -> Bxor 1287 "bsl" -> Bsl 1288 "bsr" -> Bsr 1289 "not" -> Not 1290 "and" -> And 1291 "or" -> Or 1292 "xor" -> Xor 1293 "andalso" -> Andalso 1294 "orelse" -> Orelse 1295 1296 _ -> Atom(name, False) 1297 } 1298 #(lexer, token) 1299} 1300 1301fn lex_until_end_of_line(lexer: Lexer) -> #(Lexer, String) { 1302 let #(before, after) = 1303 splitter.split_after(lexer.splitters.until_end_of_line, lexer.source) 1304 #(advance(lexer, after), before) 1305} 1306 1307fn lex_whitespace(lexer: Lexer, lexed: String) -> #(Lexer, Token) { 1308 case lexer.source { 1309 " " as space <> source 1310 | "\n" as space <> source 1311 | "\r" as space <> source 1312 | "\t" as space <> source 1313 | "\f" as space <> source -> 1314 lex_whitespace(advance(lexer, source), lexed <> space) 1315 _ -> maybe_token(lexer, Whitespace(lexed), !lexer.ignore_whitespace) 1316 } 1317} 1318 1319fn maybe_token(lexer: Lexer, token: Token, condition: Bool) -> #(Lexer, Token) { 1320 case condition { 1321 True -> #(lexer, token) 1322 False -> next(lexer) 1323 } 1324} 1325 1326fn advance(lexer: Lexer, source: String) -> Lexer { 1327 Lexer(..lexer, source:) 1328} 1329 1330fn error(lexer: Lexer, error: Error) -> Lexer { 1331 Lexer(..lexer, errors: [error, ..lexer.errors]) 1332} 1333 1334/// A highlighting token, containing information about the kind of syntax 1335/// being used. Many similar tokens (e.g. all keywords) are grouped together 1336/// to simplify them. 1337/// 1338/// For syntax tokens, see [`Token`](#Token). 1339/// 1340pub type HighlightToken { 1341 HighlightWhitespace(String) 1342 HighlightKeyword(String) 1343 HighlightVariable(String) 1344 HighlightString(String) 1345 HighlightAtom(String) 1346 HighlightNumber(String) 1347 HighlightModule(String) 1348 HighlightFunction(String) 1349 HighlightOperator(String) 1350 HighlightComment(String) 1351 HighlightPunctuation(String) 1352 HighlightOther(String) 1353} 1354 1355/// Convert a string of Erlang source code into ansi highlighting. 1356/// 1357/// Colours taken from [`contour`](https://hexdocs.pm/contour): 1358/// | Token | Colour | 1359/// | ---------------------- | ----------- | 1360/// | Keyword | Yellow | 1361/// | Module | Cyan | 1362/// | Function | Blue | 1363/// | Operator | Magenta | 1364/// | Comment | Italic grey | 1365/// | String, Number, Atom | Green | 1366/// | Whitespace, Variable | No colour | 1367/// 1368/// If you wish to use other colours or another format, use `to_tokens`. 1369/// 1370pub fn highlight_ansi(code: String) -> String { 1371 highlight_tokens(code) 1372 |> list.fold("", fn(code, token) { 1373 code 1374 <> case token { 1375 HighlightWhitespace(s) -> ansi.reset(s) 1376 HighlightKeyword(s) -> ansi.yellow(s) 1377 HighlightVariable(s) -> ansi.reset(s) 1378 HighlightString(s) -> ansi.green(s) 1379 HighlightAtom(s) -> ansi.green(s) 1380 HighlightNumber(s) -> ansi.green(s) 1381 HighlightModule(s) -> ansi.cyan(s) 1382 HighlightFunction(s) -> ansi.blue(s) 1383 HighlightOperator(s) -> ansi.magenta(s) 1384 HighlightComment(s) -> ansi.italic(ansi.gray(s)) 1385 HighlightPunctuation(s) -> ansi.reset(s) 1386 HighlightOther(s) -> ansi.reset(s) 1387 } 1388 }) 1389} 1390 1391/// Convert a string of Erlang source code into an HTML string. 1392/// Each token is wrapped in a `<span>` with a class indicating the type of 1393/// 1394/// Class names taken from [`contour`](https://hexdocs.pm/contour): 1395/// | Token | CSS class | 1396/// | ----------- | -------------- | 1397/// | Keyword | hl-keyword | 1398/// | Variable | hl-variable | 1399/// | Module | hl-module | 1400/// | Function | hl-function | 1401/// | Operator | hl-operator | 1402/// | Punctuation | hl-punctuation | 1403/// | Comment | hl-comment | 1404/// | String | hl-string | 1405/// | Atom | hl-atom | 1406/// | Number | hl-number | 1407/// | Whitespace | no class | 1408/// 1409/// Place the output within a `<pre><code>...</code></pre>` and add styling for 1410/// these CSS classes to get highlighting on your website. Here's some CSS you 1411/// could use: 1412/// 1413/// ```css 1414/// pre code .hl-comment { color: #d4d4d4; font-style: italic } 1415/// pre code .hl-function { color: #9ce7ff } 1416/// pre code .hl-keyword { color: #ffd596 } 1417/// pre code .hl-operator { color: #ffaff3 } 1418/// pre code .hl-string { color: #c8ffa7 } 1419/// pre code .hl-number { color: #c8ffa7 } 1420/// pre code .hl-atom { color: #c8ffa7 } 1421/// pre code .hl-module { color: #ffddfa } 1422/// ``` 1423/// 1424/// If you wish to use another format see `to_ansi` or `to_tokens`. 1425/// 1426pub fn highlight_html(code: String) -> String { 1427 highlight_tokens(code) 1428 |> list.fold("", fn(acc, token) { 1429 case token { 1430 HighlightWhitespace(s) -> acc <> s 1431 HighlightKeyword(s) -> 1432 acc <> "<span class=hl-keyword>" <> houdini.escape(s) <> "</span>" 1433 HighlightVariable(s) -> 1434 acc <> "<span class=hl-variable>" <> houdini.escape(s) <> "</span>" 1435 HighlightString(s) -> 1436 acc <> "<span class=hl-string>" <> houdini.escape(s) <> "</span>" 1437 HighlightAtom(s) -> 1438 acc <> "<span class=hl-atom>" <> houdini.escape(s) <> "</span>" 1439 HighlightNumber(s) -> 1440 acc <> "<span class=hl-number>" <> houdini.escape(s) <> "</span>" 1441 HighlightModule(s) -> 1442 acc <> "<span class=hl-module>" <> houdini.escape(s) <> "</span>" 1443 HighlightFunction(s) -> 1444 acc <> "<span class=hl-function>" <> houdini.escape(s) <> "</span>" 1445 HighlightOperator(s) -> 1446 acc <> "<span class=hl-operator>" <> houdini.escape(s) <> "</span>" 1447 HighlightComment(s) -> 1448 acc <> "<span class=hl-comment>" <> houdini.escape(s) <> "</span>" 1449 HighlightPunctuation(s) -> 1450 acc <> "<span class=hl-punctuation>" <> houdini.escape(s) <> "</span>" 1451 HighlightOther(s) -> acc <> s 1452 } 1453 }) 1454} 1455 1456/// Convert a string of Erlang source code into highlighting tokens. 1457/// Highlighting tokens only contain information about the kind of syntax 1458/// being used, grouping similar tokens (e.g. all keywords) into one category. 1459/// 1460/// To convert code into syntax tokens, see `pearl.tokenise`. 1461/// 1462pub fn highlight_tokens(code: String) -> List(HighlightToken) { 1463 let #(tokens, _errors) = tokenise(new(code)) 1464 do_highlight_tokens(tokens, []) 1465} 1466 1467fn do_highlight_tokens( 1468 in: List(Token), 1469 out: List(HighlightToken), 1470) -> List(HighlightToken) { 1471 case in { 1472 [] -> list.reverse(out) 1473 1474 // Specific constructs 1475 [Atom(value, quoted: False), LeftParen, ..in] -> 1476 do_highlight_tokens(in, [ 1477 HighlightPunctuation("("), 1478 HighlightFunction(value), 1479 ..out 1480 ]) 1481 [Atom(function, quoted: False), Slash, Integer(arity), ..in] -> 1482 do_highlight_tokens(in, [ 1483 HighlightNumber(arity), 1484 HighlightPunctuation("/"), 1485 HighlightFunction(function), 1486 ..out 1487 ]) 1488 [ 1489 Atom(module, quoted: False), 1490 Colon, 1491 Atom(function, quoted: False), 1492 Slash, 1493 Integer(arity), 1494 ..in 1495 ] -> 1496 do_highlight_tokens(in, [ 1497 HighlightNumber(arity), 1498 HighlightPunctuation("/"), 1499 HighlightFunction(function), 1500 HighlightPunctuation(":"), 1501 HighlightModule(module), 1502 ..out 1503 ]) 1504 [Atom(module, quoted: False), Colon, Atom(function, quoted: False), ..in] -> 1505 do_highlight_tokens(in, [ 1506 HighlightFunction(function), 1507 HighlightPunctuation(":"), 1508 HighlightModule(module), 1509 ..out 1510 ]) 1511 [Question, Variable(macro_name), ..in] -> 1512 do_highlight_tokens(in, [ 1513 HighlightFunction(macro_name), 1514 HighlightPunctuation("?"), 1515 ..out 1516 ]) 1517 1518 // Whitespace and comments 1519 [Whitespace(space), ..in] -> 1520 do_highlight_tokens(in, [HighlightWhitespace(space), ..out]) 1521 [Comment(contents), ..in] -> 1522 do_highlight_tokens(in, [HighlightComment("%" <> contents), ..out]) 1523 [DocComment(contents), ..in] -> 1524 do_highlight_tokens(in, [HighlightComment("%%" <> contents), ..out]) 1525 [ModuleComment(contents), ..in] -> 1526 do_highlight_tokens(in, [HighlightComment("%%%" <> contents), ..out]) 1527 [EndOfFile, ..in] -> do_highlight_tokens(in, out) 1528 1529 // Literals 1530 [Character(char), ..in] -> 1531 do_highlight_tokens(in, [HighlightString("$" <> char), ..out]) 1532 [Integer(int), ..in] -> 1533 do_highlight_tokens(in, [HighlightNumber(int), ..out]) 1534 [Float(float), ..in] -> 1535 do_highlight_tokens(in, [HighlightNumber(float), ..out]) 1536 [Atom(name:, quoted: True), ..in] -> 1537 do_highlight_tokens(in, [HighlightAtom("'" <> name <> "'"), ..out]) 1538 [Atom(name:, quoted: False), ..in] -> 1539 do_highlight_tokens(in, [HighlightAtom(name), ..out]) 1540 [String(contents), ..in] -> 1541 do_highlight_tokens(in, [HighlightString("\"" <> contents <> "\""), ..out]) 1542 [ 1543 TripleQuotedString( 1544 sigil:, 1545 number_of_quotes:, 1546 beginning_whitespace:, 1547 lines:, 1548 end_indentation:, 1549 ), 1550 ..in 1551 ] -> 1552 do_highlight_tokens(in, [ 1553 HighlightString( 1554 case sigil { 1555 option.None -> "" 1556 option.Some(sigil) -> "~" <> sigil 1557 } 1558 <> string.repeat("\"", number_of_quotes) 1559 <> beginning_whitespace 1560 <> string.join( 1561 list.map(lines, fn(line) { end_indentation <> line }), 1562 "\n", 1563 ) 1564 <> "\n" 1565 <> end_indentation 1566 <> string.repeat("\"", number_of_quotes), 1567 ), 1568 ..out 1569 ]) 1570 [Sigil(sigil:, delimiter:, contents:), ..in] -> 1571 do_highlight_tokens(in, [ 1572 HighlightString({ 1573 let #(opening, closing) = sigil_delimiters(delimiter) 1574 "~" <> sigil <> opening <> contents <> closing 1575 }), 1576 ..out 1577 ]) 1578 [Variable(name), ..in] -> 1579 do_highlight_tokens(in, [HighlightVariable(name), ..out]) 1580 1581 // Keywords 1582 [After, ..in] -> do_highlight_tokens(in, [HighlightKeyword("after"), ..out]) 1583 [Begin, ..in] -> do_highlight_tokens(in, [HighlightKeyword("begin"), ..out]) 1584 [Case, ..in] -> do_highlight_tokens(in, [HighlightKeyword("case"), ..out]) 1585 [Catch, ..in] -> do_highlight_tokens(in, [HighlightKeyword("catch"), ..out]) 1586 [Cond, ..in] -> do_highlight_tokens(in, [HighlightKeyword("cond"), ..out]) 1587 [Else, ..in] -> do_highlight_tokens(in, [HighlightKeyword("else"), ..out]) 1588 [End, ..in] -> do_highlight_tokens(in, [HighlightKeyword("end"), ..out]) 1589 [Fun, ..in] -> do_highlight_tokens(in, [HighlightKeyword("fun"), ..out]) 1590 [If, ..in] -> do_highlight_tokens(in, [HighlightKeyword("if"), ..out]) 1591 [Let, ..in] -> do_highlight_tokens(in, [HighlightKeyword("let"), ..out]) 1592 [Maybe, ..in] -> do_highlight_tokens(in, [HighlightKeyword("maybe"), ..out]) 1593 [Of, ..in] -> do_highlight_tokens(in, [HighlightKeyword("of"), ..out]) 1594 [Receive, ..in] -> 1595 do_highlight_tokens(in, [HighlightKeyword("receive"), ..out]) 1596 [Try, ..in] -> do_highlight_tokens(in, [HighlightKeyword("try"), ..out]) 1597 [When, ..in] -> do_highlight_tokens(in, [HighlightKeyword("when"), ..out]) 1598 1599 // Punctuation 1600 [LeftParen, ..in] -> 1601 do_highlight_tokens(in, [HighlightPunctuation("("), ..out]) 1602 [RightParen, ..in] -> 1603 do_highlight_tokens(in, [HighlightPunctuation(")"), ..out]) 1604 [LeftBrace, ..in] -> 1605 do_highlight_tokens(in, [HighlightPunctuation("{"), ..out]) 1606 [RightBrace, ..in] -> 1607 do_highlight_tokens(in, [HighlightPunctuation("}"), ..out]) 1608 [LeftSquare, ..in] -> 1609 do_highlight_tokens(in, [HighlightPunctuation("["), ..out]) 1610 [RightSquare, ..in] -> 1611 do_highlight_tokens(in, [HighlightPunctuation("]"), ..out]) 1612 [Comma, ..in] -> do_highlight_tokens(in, [HighlightPunctuation(","), ..out]) 1613 [Semicolon, ..in] -> 1614 do_highlight_tokens(in, [HighlightPunctuation(";"), ..out]) 1615 [Colon, ..in] -> do_highlight_tokens(in, [HighlightPunctuation(":"), ..out]) 1616 [Dot, ..in] -> do_highlight_tokens(in, [HighlightPunctuation("."), ..out]) 1617 [MinusGreater, ..in] -> 1618 do_highlight_tokens(in, [HighlightPunctuation("->"), ..out]) 1619 [DoubleLess, ..in] -> 1620 do_highlight_tokens(in, [HighlightPunctuation("<<"), ..out]) 1621 [DoubleGreater, ..in] -> 1622 do_highlight_tokens(in, [HighlightPunctuation(">>"), ..out]) 1623 [Hash, ..in] -> do_highlight_tokens(in, [HighlightPunctuation("#"), ..out]) 1624 [DoubleColon, ..in] -> 1625 do_highlight_tokens(in, [HighlightPunctuation("::"), ..out]) 1626 [DoubleDot, ..in] -> 1627 do_highlight_tokens(in, [HighlightPunctuation(".."), ..out]) 1628 [TripleDot, ..in] -> 1629 do_highlight_tokens(in, [HighlightPunctuation("..."), ..out]) 1630 [Question, ..in] -> 1631 do_highlight_tokens(in, [HighlightPunctuation("?"), ..out]) 1632 1633 // Operators 1634 [DoublePipe, ..in] -> 1635 do_highlight_tokens(in, [HighlightOperator("||"), ..out]) 1636 [EqualGreater, ..in] -> 1637 do_highlight_tokens(in, [HighlightOperator("=>"), ..out]) 1638 [ColonEqual, ..in] -> 1639 do_highlight_tokens(in, [HighlightOperator(":="), ..out]) 1640 [LessMinus, ..in] -> 1641 do_highlight_tokens(in, [HighlightOperator("<-"), ..out]) 1642 [LessEqual, ..in] -> 1643 do_highlight_tokens(in, [HighlightOperator("<="), ..out]) 1644 [Pipe, ..in] -> do_highlight_tokens(in, [HighlightOperator("|"), ..out]) 1645 [DoubleEqual, ..in] -> 1646 do_highlight_tokens(in, [HighlightOperator("=="), ..out]) 1647 [SlashEqual, ..in] -> 1648 do_highlight_tokens(in, [HighlightOperator("/="), ..out]) 1649 [EqualLess, ..in] -> 1650 do_highlight_tokens(in, [HighlightOperator("=<"), ..out]) 1651 [Less, ..in] -> do_highlight_tokens(in, [HighlightOperator("<"), ..out]) 1652 [GreaterEqual, ..in] -> 1653 do_highlight_tokens(in, [HighlightOperator(">="), ..out]) 1654 [Greater, ..in] -> do_highlight_tokens(in, [HighlightOperator(">"), ..out]) 1655 [EqualColonEqual, ..in] -> 1656 do_highlight_tokens(in, [HighlightOperator("=:="), ..out]) 1657 [EqualSlashEqual, ..in] -> 1658 do_highlight_tokens(in, [HighlightOperator("=/="), ..out]) 1659 [Plus, ..in] -> do_highlight_tokens(in, [HighlightOperator("+"), ..out]) 1660 [Minus, ..in] -> do_highlight_tokens(in, [HighlightOperator("-"), ..out]) 1661 [Star, ..in] -> do_highlight_tokens(in, [HighlightOperator("*"), ..out]) 1662 [Slash, ..in] -> do_highlight_tokens(in, [HighlightOperator("/"), ..out]) 1663 [Bnot, ..in] -> do_highlight_tokens(in, [HighlightOperator("bnot"), ..out]) 1664 [Div, ..in] -> do_highlight_tokens(in, [HighlightOperator("div"), ..out]) 1665 [Rem, ..in] -> do_highlight_tokens(in, [HighlightOperator("rem"), ..out]) 1666 [Band, ..in] -> do_highlight_tokens(in, [HighlightOperator("band"), ..out]) 1667 [Bor, ..in] -> do_highlight_tokens(in, [HighlightOperator("bor"), ..out]) 1668 [Bxor, ..in] -> do_highlight_tokens(in, [HighlightOperator("bxor"), ..out]) 1669 [Bsl, ..in] -> do_highlight_tokens(in, [HighlightOperator("bsl"), ..out]) 1670 [Bsr, ..in] -> do_highlight_tokens(in, [HighlightOperator("bsr"), ..out]) 1671 [Not, ..in] -> do_highlight_tokens(in, [HighlightOperator("not"), ..out]) 1672 [And, ..in] -> do_highlight_tokens(in, [HighlightOperator("and"), ..out]) 1673 [Or, ..in] -> do_highlight_tokens(in, [HighlightOperator("or"), ..out]) 1674 [Xor, ..in] -> do_highlight_tokens(in, [HighlightOperator("xor"), ..out]) 1675 [Andalso, ..in] -> 1676 do_highlight_tokens(in, [HighlightOperator("andalso"), ..out]) 1677 [Orelse, ..in] -> 1678 do_highlight_tokens(in, [HighlightOperator("orelse"), ..out]) 1679 [DoublePlus, ..in] -> 1680 do_highlight_tokens(in, [HighlightOperator("++"), ..out]) 1681 [DoubleMinus, ..in] -> 1682 do_highlight_tokens(in, [HighlightOperator("--"), ..out]) 1683 [QuestionEqual, ..in] -> 1684 do_highlight_tokens(in, [HighlightOperator("?="), ..out]) 1685 [Bang, ..in] -> do_highlight_tokens(in, [HighlightOperator("!"), ..out]) 1686 [Equal, ..in] -> do_highlight_tokens(in, [HighlightOperator("="), ..out]) 1687 1688 // Invalid tokens 1689 [Unknown(char), ..in] -> 1690 do_highlight_tokens(in, [HighlightOther(char), ..out]) 1691 [UnterminatedString(contents), ..in] -> 1692 do_highlight_tokens(in, [HighlightString("\"" <> contents), ..out]) 1693 [UnterminatedSigil(sigil:, contents:, delimiter:), ..in] -> 1694 do_highlight_tokens(in, [ 1695 HighlightString({ 1696 let #(opening, _closing) = sigil_delimiters(delimiter) 1697 "~" <> sigil <> opening <> contents 1698 }), 1699 ..out 1700 ]) 1701 [UnterminatedAtom(contents), ..in] -> 1702 do_highlight_tokens(in, [HighlightAtom("'" <> contents), ..out]) 1703 [InvalidTripleQuotedString(contents), ..in] -> 1704 do_highlight_tokens(in, [ 1705 HighlightString("\"\"\"" <> contents <> "\"\"\""), 1706 ..out 1707 ]) 1708 } 1709}