An Erlang lexer and syntax highlighter in Gleam

Add support for triple-quoted sigils

Changed files
+80 -29
src
test
+46 -27
src/pearl.gleam
··· 1 1 import gleam/int 2 2 import gleam/list 3 + import gleam/option.{type Option, None, Some} 3 4 import gleam/string 4 5 import pearl/token.{type Token} 5 6 import splitter.{type Splitter} ··· 231 232 | "9" as char <> source -> 232 233 lex_number(advance(lexer, source), char, Initial, AfterNumber) 233 234 234 - "\"\"\"" <> source -> lex_triple_quoted_string(advance(lexer, source)) 235 + "\"\"\"" <> source -> lex_triple_quoted_string(advance(lexer, source), None) 235 236 236 237 "\"" <> source -> lex_string(advance(lexer, source), "") 237 238 "'" <> source -> lex_quoted_atom(advance(lexer, source), "") ··· 601 602 _ -> #(lexer, "", False) 602 603 } 603 604 604 - let #(lexer, delimiter, closing_char) = case lexer.source { 605 - "(" <> source -> #(advance(lexer, source), token.SigilParen, ")") 606 - "[" <> source -> #(advance(lexer, source), token.SigilSquare, "]") 607 - "{" <> source -> #(advance(lexer, source), token.SigilBrace, "}") 608 - "<" <> source -> #(advance(lexer, source), token.SigilAngle, ">") 605 + case lexer.source { 606 + "\"\"\"" <> source -> 607 + lex_triple_quoted_string(advance(lexer, source), Some(sigil)) 608 + _ -> { 609 + let #(lexer, delimiter, closing_char) = case lexer.source { 610 + "(" <> source -> #(advance(lexer, source), token.SigilParen, ")") 611 + "[" <> source -> #(advance(lexer, source), token.SigilSquare, "]") 612 + "{" <> source -> #(advance(lexer, source), token.SigilBrace, "}") 613 + "<" <> source -> #(advance(lexer, source), token.SigilAngle, ">") 609 614 610 - "/" <> source -> #(advance(lexer, source), token.SigilSlash, "/") 611 - "|" <> source -> #(advance(lexer, source), token.SigilPipe, "|") 612 - "'" <> source -> #(advance(lexer, source), token.SigilSingleQuote, "'") 613 - "\"" <> source -> #(advance(lexer, source), token.SigilDoubleQuote, "\"") 614 - "`" <> source -> #(advance(lexer, source), token.SigilBacktick, "`") 615 - "#" <> source -> #(advance(lexer, source), token.SigilHash, "#") 615 + "/" <> source -> #(advance(lexer, source), token.SigilSlash, "/") 616 + "|" <> source -> #(advance(lexer, source), token.SigilPipe, "|") 617 + "'" <> source -> #(advance(lexer, source), token.SigilSingleQuote, "'") 618 + "\"" <> source -> #( 619 + advance(lexer, source), 620 + token.SigilDoubleQuote, 621 + "\"", 622 + ) 623 + "`" <> source -> #(advance(lexer, source), token.SigilBacktick, "`") 624 + "#" <> source -> #(advance(lexer, source), token.SigilHash, "#") 616 625 617 - _ -> #(error(lexer, ExpectedSigilDelimiter), token.SigilNone, "") 618 - } 626 + _ -> #(error(lexer, ExpectedSigilDelimiter), token.SigilNone, "") 627 + } 628 + 629 + case delimiter { 630 + token.SigilNone -> #( 631 + lexer, 632 + token.UnterminatedSigil(sigil:, delimiter:, contents: ""), 633 + ) 634 + _ -> { 635 + let splitter = case verbatim { 636 + False -> lexer.splitters.sigil 637 + True -> lexer.splitters.sigil_verbatim 638 + } 619 639 620 - case delimiter { 621 - token.SigilNone -> #( 622 - lexer, 623 - token.UnterminatedSigil(sigil:, delimiter:, contents: ""), 624 - ) 625 - _ -> { 626 - let splitter = case verbatim { 627 - False -> lexer.splitters.sigil 628 - True -> lexer.splitters.sigil_verbatim 640 + do_lex_sigil(lexer, sigil, delimiter, closing_char, splitter, "") 641 + } 629 642 } 630 - 631 - do_lex_sigil(lexer, sigil, delimiter, closing_char, splitter, "") 632 643 } 633 644 } 634 645 } ··· 707 718 } 708 719 } 709 720 710 - fn lex_triple_quoted_string(lexer: Lexer) -> #(Lexer, Token) { 721 + fn lex_triple_quoted_string( 722 + lexer: Lexer, 723 + sigil: Option(String), 724 + ) -> #(Lexer, Token) { 711 725 let #(lexer, beginning_whitespace) = case 712 726 splitter.split(lexer.splitters.until_end_of_line, lexer.source) 713 727 { ··· 742 756 } 743 757 Ok(lines) -> #( 744 758 lexer, 745 - token.TripleQuotedString(beginning_whitespace:, lines:, end_indentation:), 759 + token.TripleQuotedString( 760 + sigil:, 761 + beginning_whitespace:, 762 + lines:, 763 + end_indentation:, 764 + ), 746 765 ) 747 766 } 748 767 }
+8 -2
src/pearl/token.gleam
··· 1 1 import gleam/list 2 + import gleam/option 2 3 import gleam/string 3 4 4 5 pub type Token { ··· 15 16 Atom(name: String, quoted: Bool) 16 17 String(String) 17 18 TripleQuotedString( 19 + sigil: option.Option(String), 18 20 beginning_whitespace: String, 19 21 lines: List(String), 20 22 end_indentation: String, ··· 123 125 Atom(name:, quoted: True) -> "'" <> name <> "'" 124 126 Atom(name:, quoted: False) -> name 125 127 String(contents) -> "\"" <> contents <> "\"" 126 - TripleQuotedString(beginning_whitespace:, lines:, end_indentation:) -> 127 - "\"\"\"" 128 + TripleQuotedString(sigil:, beginning_whitespace:, lines:, end_indentation:) -> 129 + case sigil { 130 + option.None -> "" 131 + option.Some(sigil) -> "~" <> sigil 132 + } 133 + <> "\"\"\"" 128 134 <> beginning_whitespace 129 135 <> string.join( 130 136 list.map(lines, fn(line) { end_indentation <> line }),
+21
test/pearl_test.gleam
··· 1 1 import gleam/list 2 + import gleam/option.{None, Some} 2 3 import gleeunit 3 4 import pearl 4 5 import pearl/token ··· 107 108 108 109 assert_tokens(src, [ 109 110 token.TripleQuotedString( 111 + sigil: None, 110 112 beginning_whitespace: " \n", 111 113 lines: [ 112 114 "Hello, this is triple-quoted!", ··· 114 116 "Quotes are allowed: \"\", even three: \"\"\"", 115 117 ], 116 118 end_indentation: "\t ", 119 + ), 120 + ]) 121 + } 122 + 123 + pub fn triple_quoted_string_sigil_test() { 124 + let src = 125 + " 126 + ~b\"\"\" 127 + Hello 128 + This is a triple-quoted sigil 129 + \"\"\" 130 + " 131 + 132 + assert_tokens(src, [ 133 + token.TripleQuotedString( 134 + sigil: Some("b"), 135 + beginning_whitespace: "\n", 136 + lines: ["Hello", "This is a triple-quoted sigil"], 137 + end_indentation: " ", 117 138 ), 118 139 ]) 119 140 }
+5
test/tokens.txt
··· 20 20 Three quotes allowed here: """ and here: """ 21 21 """ 22 22 23 + ~B""" 24 + Triple quote sigil! 25 + bye 26 + """ 27 + 23 28 after 24 29 begin 25 30 case