+115
-2
src/pearl.gleam
+115
-2
src/pearl.gleam
···
22
22
brace_escape_sequence: Splitter,
23
23
sigil: Splitter,
24
24
sigil_verbatim: Splitter,
25
+
triple_quoted_string: Splitter,
25
26
)
26
27
}
27
28
···
36
37
UnterminatedCharacter
37
38
UnterminatedEscapeSequence
38
39
ExpectedSigilDelimiter
40
+
ExpectedWhitespaceAfterTripleQuote
41
+
InvalidTripleQuotedStringIndentation(
42
+
expected_indentation: String,
43
+
line: String,
44
+
)
39
45
}
40
46
41
47
pub fn new(source: String) -> Lexer {
···
50
56
51
57
fn make_splitters() -> Splitters {
52
58
Splitters(
53
-
until_end_of_line: splitter.new(["\n", "\r"]),
59
+
until_end_of_line: splitter.new(["\n", "\r\n"]),
54
60
string: splitter.new(["\"", "\\"]),
55
61
quoted_atom: splitter.new(["'", "\\"]),
56
-
brace_escape_sequence: splitter.new(["}", "\n", "\r"]),
62
+
brace_escape_sequence: splitter.new(["}", "\n", "\r\n"]),
57
63
sigil: splitter.new([
58
64
")", "]", "}", ">", "/", "|", "'", "\"", "`", "#", "\\",
59
65
]),
60
66
sigil_verbatim: splitter.new([
61
67
")", "]", "}", ">", "/", "|", "'", "\"", "`", "#",
62
68
]),
69
+
triple_quoted_string: splitter.new(["\n", "\r\n", "\"\"\""]),
63
70
)
64
71
}
65
72
···
223
230
| "8" as char <> source
224
231
| "9" as char <> source ->
225
232
lex_number(advance(lexer, source), char, Initial, AfterNumber)
233
+
234
+
"\"\"\"" <> source -> lex_triple_quoted_string(advance(lexer, source))
226
235
227
236
"\"" <> source -> lex_string(advance(lexer, source), "")
228
237
"'" <> source -> lex_quoted_atom(advance(lexer, source), "")
···
695
704
}
696
705
697
706
_ -> #(advance(lexer, after), token.String(contents <> before))
707
+
}
708
+
}
709
+
710
+
fn lex_triple_quoted_string(lexer: Lexer) -> #(Lexer, Token) {
711
+
let #(lexer, beginning_whitespace) = case
712
+
splitter.split(lexer.splitters.until_end_of_line, lexer.source)
713
+
{
714
+
#(_, "", _) -> #(error(lexer, ExpectedWhitespaceAfterTripleQuote), "")
715
+
#(before, newline, after) ->
716
+
case is_whitespace(before) {
717
+
True -> #(advance(lexer, after), before <> newline)
718
+
False -> #(error(lexer, ExpectedWhitespaceAfterTripleQuote), "")
719
+
}
720
+
}
721
+
722
+
let #(lexer, lines, end_indentation) =
723
+
lex_triple_quoted_string_contents(lexer, [], "")
724
+
725
+
case strip_line_prefixes(lines, end_indentation, []) {
726
+
Error(line) -> {
727
+
let contents =
728
+
beginning_whitespace
729
+
<> string.join(list.reverse(lines), "\n")
730
+
<> "\n"
731
+
<> end_indentation
732
+
#(
733
+
error(
734
+
lexer,
735
+
InvalidTripleQuotedStringIndentation(
736
+
expected_indentation: end_indentation,
737
+
line:,
738
+
),
739
+
),
740
+
token.InvalidTripleQuotedString(contents),
741
+
)
742
+
}
743
+
Ok(lines) -> #(
744
+
lexer,
745
+
token.TripleQuotedString(beginning_whitespace:, lines:, end_indentation:),
746
+
)
747
+
}
748
+
}
749
+
750
+
fn is_whitespace(string: String) -> Bool {
751
+
case string {
752
+
"" -> True
753
+
" " <> string
754
+
| "\n" <> string
755
+
| "\r" <> string
756
+
| "\t" <> string
757
+
| "\f" <> string -> is_whitespace(string)
758
+
_ -> False
759
+
}
760
+
}
761
+
762
+
fn strip_line_prefixes(
763
+
lines: List(String),
764
+
end_indentation: String,
765
+
acc: List(String),
766
+
) -> Result(List(String), String) {
767
+
case lines {
768
+
[] -> Ok(acc)
769
+
[line, ..lines] ->
770
+
case strip_prefix(line, end_indentation) {
771
+
Ok(line) -> strip_line_prefixes(lines, end_indentation, [line, ..acc])
772
+
Error(_) -> Error(line)
773
+
}
774
+
}
775
+
}
776
+
777
+
@external(erlang, "pearl_ffi", "strip_prefix")
778
+
@external(javascript, "./pearl_ffi.mjs", "strip_prefix")
779
+
fn strip_prefix(string: String, prefix: String) -> Result(String, Nil)
780
+
781
+
fn lex_triple_quoted_string_contents(
782
+
lexer: Lexer,
783
+
lines: List(String),
784
+
current_line: String,
785
+
) -> #(Lexer, List(String), String) {
786
+
let #(before, split, after) =
787
+
splitter.split(lexer.splitters.triple_quoted_string, lexer.source)
788
+
789
+
let before = current_line <> before
790
+
791
+
case split {
792
+
"\"\"\"" ->
793
+
case is_whitespace(before) {
794
+
False ->
795
+
lex_triple_quoted_string_contents(
796
+
advance(lexer, after),
797
+
lines,
798
+
before <> "\"\"\"",
799
+
)
800
+
True -> #(advance(lexer, after), lines, before)
801
+
}
802
+
803
+
"\n" | "\r\n" ->
804
+
lex_triple_quoted_string_contents(
805
+
advance(lexer, after),
806
+
[before, ..lines],
807
+
"",
808
+
)
809
+
810
+
_ -> #(error(lexer, UnterminatedString), [before, ..lines], "")
698
811
}
699
812
}
700
813
+20
-3
src/pearl/token.gleam
+20
-3
src/pearl/token.gleam
···
1
+
import gleam/list
2
+
import gleam/string
3
+
1
4
pub type Token {
2
5
// Whitespace and comments
3
6
Whitespace(String)
···
11
14
Float(String)
12
15
Atom(name: String, quoted: Bool)
13
16
String(String)
14
-
TripleQuotedString(contents: String, end_indentation: String)
17
+
TripleQuotedString(
18
+
beginning_whitespace: String,
19
+
lines: List(String),
20
+
end_indentation: String,
21
+
)
15
22
Sigil(sigil: String, delimiter: SigilDelimiter, contents: String)
16
23
Variable(String)
17
24
···
98
105
UnterminatedString(String)
99
106
UnterminatedSigil(sigil: String, delimiter: SigilDelimiter, contents: String)
100
107
UnterminatedAtom(String)
108
+
InvalidTripleQuotedString(contents: String)
101
109
}
102
110
103
111
pub fn to_source(token: Token) -> String {
···
115
123
Atom(name:, quoted: True) -> "'" <> name <> "'"
116
124
Atom(name:, quoted: False) -> name
117
125
String(contents) -> "\"" <> contents <> "\""
118
-
TripleQuotedString(contents:, end_indentation:) ->
119
-
"\"\"\"\n" <> contents <> "\n" <> end_indentation <> "\"\"\""
126
+
TripleQuotedString(beginning_whitespace:, lines:, end_indentation:) ->
127
+
"\"\"\""
128
+
<> beginning_whitespace
129
+
<> string.join(
130
+
list.map(lines, fn(line) { end_indentation <> line }),
131
+
"\n",
132
+
)
133
+
<> "\n"
134
+
<> end_indentation
135
+
<> "\"\"\""
120
136
Sigil(sigil:, delimiter:, contents:) -> {
121
137
let #(opening, closing) = sigil_delimiters(delimiter)
122
138
"~" <> sigil <> opening <> contents <> closing
···
209
225
"~" <> sigil <> opening <> contents
210
226
}
211
227
UnterminatedAtom(contents) -> "'" <> contents
228
+
InvalidTripleQuotedString(contents) -> "\"\"\"" <> contents <> "\"\"\""
212
229
}
213
230
}
214
231
+14
src/pearl_ffi.erl
+14
src/pearl_ffi.erl
···
1
+
-module(pearl_ffi).
2
+
3
+
-export([strip_prefix/2]).
4
+
5
+
strip_prefix(String, Prefix) ->
6
+
Prefix_size = byte_size(Prefix),
7
+
8
+
case Prefix == binary_part(String, 0, Prefix_size) of
9
+
true ->
10
+
String_size = byte_size(String),
11
+
{ok, binary_part(String, Prefix_size, String_size - Prefix_size)};
12
+
false ->
13
+
{error, nil}
14
+
end.
+9
src/pearl_ffi.mjs
+9
src/pearl_ffi.mjs
+44
test/pearl_test.gleam
+44
test/pearl_test.gleam
···
95
95
])
96
96
}
97
97
98
+
pub fn triple_quoted_string_test() {
99
+
let src =
100
+
"
101
+
\"\"\"
102
+
\t Hello, this is triple-quoted!
103
+
\t This line starts with a space
104
+
\t Quotes are allowed: \"\", even three: \"\"\"
105
+
\t \"\"\"
106
+
"
107
+
108
+
assert_tokens(src, [
109
+
token.TripleQuotedString(
110
+
beginning_whitespace: " \n",
111
+
lines: [
112
+
"Hello, this is triple-quoted!",
113
+
" This line starts with a space",
114
+
"Quotes are allowed: \"\", even three: \"\"\"",
115
+
],
116
+
end_indentation: "\t ",
117
+
),
118
+
])
119
+
}
120
+
98
121
pub fn unknown_character_test() {
99
122
let src = "a&b"
100
123
assert_errors(src, [pearl.UnknownCharacter("&")])
···
212
235
let src = "1.2e-"
213
236
assert_errors(src, [pearl.ExpectedExponent])
214
237
}
238
+
239
+
pub fn missing_whitespace_after_triple_quote() {
240
+
let src =
241
+
"\"\"\"Hello
242
+
\"\"\""
243
+
assert_errors(src, [pearl.ExpectedWhitespaceAfterTripleQuote])
244
+
}
245
+
246
+
pub fn invalid_triple_quoted_string_indentation() {
247
+
let src =
248
+
"\"\"\"
249
+
Hello
250
+
world
251
+
\"\"\""
252
+
assert_errors(src, [
253
+
pearl.InvalidTripleQuotedStringIndentation(
254
+
expected_indentation: " ",
255
+
line: " Hello",
256
+
),
257
+
])
258
+
}