Actually just three programming languages in a trenchcoat
1use crate::syntax::{Amble, Document, SyntaxError};
2use crate::{Parse, Spanned, TokenPattern};
3use peekmore::{PeekMore, PeekMoreIterator};
4use trilogy_scanner::{Token, TokenType};
5
6/// The parser for the Trilogy Programming Language.
7///
8/// This parser takes a sequence of [`Token`][]s, typically from a [`Scanner`][trilogy_scanner::Scanner],
9/// and constructs it into an AST, which we call a [`Document`][].
10pub struct Parser<'src> {
11 source: PeekMoreIterator<Box<dyn Iterator<Item = Token> + 'src>>,
12 warnings: Vec<SyntaxError>,
13 #[cfg(test)] // expose this thing to the test framework only
14 pub(crate) errors: Vec<SyntaxError>,
15 #[cfg(not(test))]
16 errors: Vec<SyntaxError>,
17 pub(crate) is_line_start: bool,
18 pub(crate) is_spaced: bool,
19}
20
21impl<'src> Parser<'src> {
22 /// Construct a new parser taking input from an iterator of [`Token`][]s. The usual
23 /// choice is to use a [`Scanner`][trilogy_scanner::Scanner]
24 pub fn new<S: Iterator<Item = Token> + 'src>(source: S) -> Self {
25 Self {
26 source: (Box::new(source) as Box<dyn Iterator<Item = Token>>).peekmore(),
27 errors: vec![],
28 warnings: vec![],
29 is_line_start: true,
30 is_spaced: false,
31 }
32 }
33
34 /// Consume the tokens provided, attempting to build a [`Document`][] from them.
35 ///
36 /// Where possible, errors are recovered from and collected for later. The returned
37 /// `Document` must not be used if the [`Parse`][] contains errors.
38 pub fn parse(mut self) -> Parse<Document> {
39 let ast = Amble::parse(&mut self);
40 Parse {
41 ast: ast.content,
42 warnings: self.warnings,
43 errors: self.errors,
44 }
45 }
46}
47
48impl Parser<'_> {
49 #[cfg_attr(not(feature = "lax"), allow(dead_code))]
50 pub(crate) fn warn(&mut self, warning: SyntaxError) {
51 self.warnings.push(warning);
52 }
53
54 pub(crate) fn error(&mut self, error: SyntaxError) {
55 self.errors.push(error);
56 }
57
58 pub(crate) fn expected(
59 &mut self,
60 token: Token,
61 message: impl std::fmt::Display,
62 ) -> SyntaxError {
63 let error = SyntaxError::new(token.span, message);
64 self.error(error.clone());
65 error
66 }
67
68 pub(crate) fn chomp(&mut self) {
69 let mut invalid_tokens = vec![];
70 loop {
71 let token = self.source.peek().expect("Peeked too many tokens");
72 if [
73 TokenType::EndOfLine,
74 TokenType::CommentBlock,
75 TokenType::CommentLine,
76 TokenType::CommentInline,
77 TokenType::Space,
78 ]
79 .matches(token)
80 {
81 self.next();
82 continue;
83 }
84 if TokenType::Error.matches(token) {
85 invalid_tokens.push(self.next());
86 continue;
87 }
88 break;
89 }
90 if !invalid_tokens.is_empty() {
91 self.error(SyntaxError::new(
92 invalid_tokens.span(),
93 "invalid characters in input",
94 ));
95 }
96 }
97
98 fn peek_chomp(&mut self) {
99 loop {
100 let Some(token) = self.source.peek() else {
101 return;
102 };
103 if token.token_type == TokenType::EndOfFile {
104 return;
105 }
106 if [
107 TokenType::EndOfLine,
108 TokenType::CommentBlock,
109 TokenType::CommentLine,
110 TokenType::CommentInline,
111 TokenType::Space,
112 TokenType::Error,
113 ]
114 .matches(token)
115 {
116 self.source.advance_cursor();
117 continue;
118 }
119 break;
120 }
121 }
122
123 fn next(&mut self) -> Token {
124 // Technically probably shouldn't unwrap here but if we consume the EndOfFile
125 // it has to be at the end, at which point we consume no more, so this should
126 // be safe.
127 let token = self.source.next().expect("Consumed too many tokens");
128
129 #[rustfmt::skip] {
130 use TokenType::*;
131 // Different types of whitespace imply that we are truly at the start of a line
132 // without any leading (non-whitespace) characters, as opposed to only the first
133 // whole token on a line but other partial tokens were on this line already
134 // (specifically, block comments).
135 //
136 // The ByteOrderMark, while not technically whitespace (or even allowed in most
137 // parts of the code, for that matter) is included here because its presence is
138 // not considered at all, so should not change the initial states of these bits
139 // in much the same way that StartOfFile does not change them.
140 //
141 // That said, cases where line endings and startings are needed are uncertain,
142 // maybe I don't need both of these flags.
143 self.is_line_start = [EndOfLine, CommentLine, DocInner, DocOuter, ByteOrderMark, StartOfFile].matches(&token) || self.is_line_start && [CommentInline, Space].matches(&token);
144 self.is_spaced = [EndOfLine, CommentLine, DocInner, DocOuter, CommentInline, CommentBlock, Space].matches(&token);
145 };
146 token
147 }
148
149 fn peek_next(&mut self) -> Option<Token> {
150 self.peek_chomp();
151 let peeked = self.source.peek().cloned();
152 self.source.advance_cursor();
153 peeked
154 }
155
156 pub(crate) fn expect_bang_oparen(&mut self) -> Result<(Token, Token), Token> {
157 use TokenType::*;
158 // Though tokenized as two tokens, this is kind of treated as one token as we
159 // require `!(` to be unspaced in procedure calls. Since whitespace is a token,
160 // a low-level peekmore after the high-level peek will sufficiently detect this.
161 let next = self.peek().clone();
162 let after = self.source.peek_nth(1);
163 if next.token_type == OpBang && after.unwrap().token_type == OParen {
164 let bang = self.expect(OpBang).unwrap();
165 let oparen = self.expect(OParen).unwrap();
166 Ok((bang, oparen))
167 } else {
168 Err(next)
169 }
170 }
171
172 pub(crate) fn peek(&mut self) -> &Token {
173 self.chomp();
174 self.source.peek().unwrap()
175 }
176
177 pub(crate) fn force_peek(&mut self) -> &Token {
178 self.source.peek().unwrap()
179 }
180
181 pub(crate) fn peekn(&mut self, n: usize) -> Option<Vec<Token>> {
182 self.chomp();
183 let tokens = (0..n).map(|_| self.peek_next()).collect::<Option<Vec<_>>>();
184 self.source.reset_cursor();
185 tokens
186 }
187
188 pub(crate) fn synchronize(&mut self, pattern: impl TokenPattern) {
189 while !pattern.matches(self.peek()) {
190 self.next();
191 }
192 }
193
194 pub(crate) fn expect(&mut self, pattern: impl TokenPattern) -> Result<Token, Token> {
195 let token = self.peek();
196 if !pattern.matches(token) {
197 return Err(token.clone());
198 }
199 Ok(self.next())
200 }
201
202 pub(crate) fn consume(&mut self) -> Token {
203 self.chomp();
204 self.next()
205 }
206
207 pub(crate) fn check(&mut self, pattern: impl TokenPattern) -> Result<&Token, &Token> {
208 let token = self.peek();
209 if pattern.matches(token) {
210 Ok(token)
211 } else {
212 Err(token)
213 }
214 }
215}