An Erlang lexer and syntax highlighter in Gleam

Add basic support for triple-quoted strings

+115 -2
src/pearl.gleam
··· 22 22 brace_escape_sequence: Splitter, 23 23 sigil: Splitter, 24 24 sigil_verbatim: Splitter, 25 + triple_quoted_string: Splitter, 25 26 ) 26 27 } 27 28 ··· 36 37 UnterminatedCharacter 37 38 UnterminatedEscapeSequence 38 39 ExpectedSigilDelimiter 40 + ExpectedWhitespaceAfterTripleQuote 41 + InvalidTripleQuotedStringIndentation( 42 + expected_indentation: String, 43 + line: String, 44 + ) 39 45 } 40 46 41 47 pub fn new(source: String) -> Lexer { ··· 50 56 51 57 fn make_splitters() -> Splitters { 52 58 Splitters( 53 - until_end_of_line: splitter.new(["\n", "\r"]), 59 + until_end_of_line: splitter.new(["\n", "\r\n"]), 54 60 string: splitter.new(["\"", "\\"]), 55 61 quoted_atom: splitter.new(["'", "\\"]), 56 - brace_escape_sequence: splitter.new(["}", "\n", "\r"]), 62 + brace_escape_sequence: splitter.new(["}", "\n", "\r\n"]), 57 63 sigil: splitter.new([ 58 64 ")", "]", "}", ">", "/", "|", "'", "\"", "`", "#", "\\", 59 65 ]), 60 66 sigil_verbatim: splitter.new([ 61 67 ")", "]", "}", ">", "/", "|", "'", "\"", "`", "#", 62 68 ]), 69 + triple_quoted_string: splitter.new(["\n", "\r\n", "\"\"\""]), 63 70 ) 64 71 } 65 72 ··· 223 230 | "8" as char <> source 224 231 | "9" as char <> source -> 225 232 lex_number(advance(lexer, source), char, Initial, AfterNumber) 233 + 234 + "\"\"\"" <> source -> lex_triple_quoted_string(advance(lexer, source)) 226 235 227 236 "\"" <> source -> lex_string(advance(lexer, source), "") 228 237 "'" <> source -> lex_quoted_atom(advance(lexer, source), "") ··· 695 704 } 696 705 697 706 _ -> #(advance(lexer, after), token.String(contents <> before)) 707 + } 708 + } 709 + 710 + fn lex_triple_quoted_string(lexer: Lexer) -> #(Lexer, Token) { 711 + let #(lexer, beginning_whitespace) = case 712 + splitter.split(lexer.splitters.until_end_of_line, lexer.source) 713 + { 714 + #(_, "", _) -> #(error(lexer, ExpectedWhitespaceAfterTripleQuote), "") 715 + #(before, newline, after) -> 716 + case is_whitespace(before) { 717 + True -> #(advance(lexer, after), before <> newline) 718 + False -> #(error(lexer, ExpectedWhitespaceAfterTripleQuote), "") 719 + } 720 + } 721 + 722 + let #(lexer, lines, end_indentation) = 723 + lex_triple_quoted_string_contents(lexer, [], "") 724 + 725 + case strip_line_prefixes(lines, end_indentation, []) { 726 + Error(line) -> { 727 + let contents = 728 + beginning_whitespace 729 + <> string.join(list.reverse(lines), "\n") 730 + <> "\n" 731 + <> end_indentation 732 + #( 733 + error( 734 + lexer, 735 + InvalidTripleQuotedStringIndentation( 736 + expected_indentation: end_indentation, 737 + line:, 738 + ), 739 + ), 740 + token.InvalidTripleQuotedString(contents), 741 + ) 742 + } 743 + Ok(lines) -> #( 744 + lexer, 745 + token.TripleQuotedString(beginning_whitespace:, lines:, end_indentation:), 746 + ) 747 + } 748 + } 749 + 750 + fn is_whitespace(string: String) -> Bool { 751 + case string { 752 + "" -> True 753 + " " <> string 754 + | "\n" <> string 755 + | "\r" <> string 756 + | "\t" <> string 757 + | "\f" <> string -> is_whitespace(string) 758 + _ -> False 759 + } 760 + } 761 + 762 + fn strip_line_prefixes( 763 + lines: List(String), 764 + end_indentation: String, 765 + acc: List(String), 766 + ) -> Result(List(String), String) { 767 + case lines { 768 + [] -> Ok(acc) 769 + [line, ..lines] -> 770 + case strip_prefix(line, end_indentation) { 771 + Ok(line) -> strip_line_prefixes(lines, end_indentation, [line, ..acc]) 772 + Error(_) -> Error(line) 773 + } 774 + } 775 + } 776 + 777 + @external(erlang, "pearl_ffi", "strip_prefix") 778 + @external(javascript, "./pearl_ffi.mjs", "strip_prefix") 779 + fn strip_prefix(string: String, prefix: String) -> Result(String, Nil) 780 + 781 + fn lex_triple_quoted_string_contents( 782 + lexer: Lexer, 783 + lines: List(String), 784 + current_line: String, 785 + ) -> #(Lexer, List(String), String) { 786 + let #(before, split, after) = 787 + splitter.split(lexer.splitters.triple_quoted_string, lexer.source) 788 + 789 + let before = current_line <> before 790 + 791 + case split { 792 + "\"\"\"" -> 793 + case is_whitespace(before) { 794 + False -> 795 + lex_triple_quoted_string_contents( 796 + advance(lexer, after), 797 + lines, 798 + before <> "\"\"\"", 799 + ) 800 + True -> #(advance(lexer, after), lines, before) 801 + } 802 + 803 + "\n" | "\r\n" -> 804 + lex_triple_quoted_string_contents( 805 + advance(lexer, after), 806 + [before, ..lines], 807 + "", 808 + ) 809 + 810 + _ -> #(error(lexer, UnterminatedString), [before, ..lines], "") 698 811 } 699 812 } 700 813
+20 -3
src/pearl/token.gleam
··· 1 + import gleam/list 2 + import gleam/string 3 + 1 4 pub type Token { 2 5 // Whitespace and comments 3 6 Whitespace(String) ··· 11 14 Float(String) 12 15 Atom(name: String, quoted: Bool) 13 16 String(String) 14 - TripleQuotedString(contents: String, end_indentation: String) 17 + TripleQuotedString( 18 + beginning_whitespace: String, 19 + lines: List(String), 20 + end_indentation: String, 21 + ) 15 22 Sigil(sigil: String, delimiter: SigilDelimiter, contents: String) 16 23 Variable(String) 17 24 ··· 98 105 UnterminatedString(String) 99 106 UnterminatedSigil(sigil: String, delimiter: SigilDelimiter, contents: String) 100 107 UnterminatedAtom(String) 108 + InvalidTripleQuotedString(contents: String) 101 109 } 102 110 103 111 pub fn to_source(token: Token) -> String { ··· 115 123 Atom(name:, quoted: True) -> "'" <> name <> "'" 116 124 Atom(name:, quoted: False) -> name 117 125 String(contents) -> "\"" <> contents <> "\"" 118 - TripleQuotedString(contents:, end_indentation:) -> 119 - "\"\"\"\n" <> contents <> "\n" <> end_indentation <> "\"\"\"" 126 + TripleQuotedString(beginning_whitespace:, lines:, end_indentation:) -> 127 + "\"\"\"" 128 + <> beginning_whitespace 129 + <> string.join( 130 + list.map(lines, fn(line) { end_indentation <> line }), 131 + "\n", 132 + ) 133 + <> "\n" 134 + <> end_indentation 135 + <> "\"\"\"" 120 136 Sigil(sigil:, delimiter:, contents:) -> { 121 137 let #(opening, closing) = sigil_delimiters(delimiter) 122 138 "~" <> sigil <> opening <> contents <> closing ··· 209 225 "~" <> sigil <> opening <> contents 210 226 } 211 227 UnterminatedAtom(contents) -> "'" <> contents 228 + InvalidTripleQuotedString(contents) -> "\"\"\"" <> contents <> "\"\"\"" 212 229 } 213 230 } 214 231
+14
src/pearl_ffi.erl
··· 1 + -module(pearl_ffi). 2 + 3 + -export([strip_prefix/2]). 4 + 5 + strip_prefix(String, Prefix) -> 6 + Prefix_size = byte_size(Prefix), 7 + 8 + case Prefix == binary_part(String, 0, Prefix_size) of 9 + true -> 10 + String_size = byte_size(String), 11 + {ok, binary_part(String, Prefix_size, String_size - Prefix_size)}; 12 + false -> 13 + {error, nil} 14 + end.
+9
src/pearl_ffi.mjs
··· 1 + import { Ok, Error } from "./gleam.mjs"; 2 + 3 + export function strip_prefix(string, prefix) { 4 + if (string.startsWith(prefix)) { 5 + return new Ok(string.slice(prefix.length)); 6 + } else { 7 + return new Error(undefined); 8 + } 9 + }
+44
test/pearl_test.gleam
··· 95 95 ]) 96 96 } 97 97 98 + pub fn triple_quoted_string_test() { 99 + let src = 100 + " 101 + \"\"\" 102 + \t Hello, this is triple-quoted! 103 + \t This line starts with a space 104 + \t Quotes are allowed: \"\", even three: \"\"\" 105 + \t \"\"\" 106 + " 107 + 108 + assert_tokens(src, [ 109 + token.TripleQuotedString( 110 + beginning_whitespace: " \n", 111 + lines: [ 112 + "Hello, this is triple-quoted!", 113 + " This line starts with a space", 114 + "Quotes are allowed: \"\", even three: \"\"\"", 115 + ], 116 + end_indentation: "\t ", 117 + ), 118 + ]) 119 + } 120 + 98 121 pub fn unknown_character_test() { 99 122 let src = "a&b" 100 123 assert_errors(src, [pearl.UnknownCharacter("&")]) ··· 212 235 let src = "1.2e-" 213 236 assert_errors(src, [pearl.ExpectedExponent]) 214 237 } 238 + 239 + pub fn missing_whitespace_after_triple_quote() { 240 + let src = 241 + "\"\"\"Hello 242 + \"\"\"" 243 + assert_errors(src, [pearl.ExpectedWhitespaceAfterTripleQuote]) 244 + } 245 + 246 + pub fn invalid_triple_quoted_string_indentation() { 247 + let src = 248 + "\"\"\" 249 + Hello 250 + world 251 + \"\"\"" 252 + assert_errors(src, [ 253 + pearl.InvalidTripleQuotedStringIndentation( 254 + expected_indentation: " ", 255 + line: " Hello", 256 + ), 257 + ]) 258 + }
+6
test/tokens.txt
··· 14 14 ~/I need "quotes"/ ~<This is also possible> ~b"Bit array" 15 15 Variable Something_else@5 16 16 17 + """ 18 + Triple quoted! 19 + This line is indented, and has quotes: "" 20 + Three quotes allowed here: """ and here: """ 21 + """ 22 + 17 23 after 18 24 begin 19 25 case