Actually just three programming languages in a trenchcoat
at string-repr-callable 215 lines 7.4 kB view raw
1use crate::syntax::{Amble, Document, SyntaxError}; 2use crate::{Parse, Spanned, TokenPattern}; 3use peekmore::{PeekMore, PeekMoreIterator}; 4use trilogy_scanner::{Token, TokenType}; 5 6/// The parser for the Trilogy Programming Language. 7/// 8/// This parser takes a sequence of [`Token`][]s, typically from a [`Scanner`][trilogy_scanner::Scanner], 9/// and constructs it into an AST, which we call a [`Document`][]. 10pub struct Parser<'src> { 11 source: PeekMoreIterator<Box<dyn Iterator<Item = Token> + 'src>>, 12 warnings: Vec<SyntaxError>, 13 #[cfg(test)] // expose this thing to the test framework only 14 pub(crate) errors: Vec<SyntaxError>, 15 #[cfg(not(test))] 16 errors: Vec<SyntaxError>, 17 pub(crate) is_line_start: bool, 18 pub(crate) is_spaced: bool, 19} 20 21impl<'src> Parser<'src> { 22 /// Construct a new parser taking input from an iterator of [`Token`][]s. The usual 23 /// choice is to use a [`Scanner`][trilogy_scanner::Scanner] 24 pub fn new<S: Iterator<Item = Token> + 'src>(source: S) -> Self { 25 Self { 26 source: (Box::new(source) as Box<dyn Iterator<Item = Token>>).peekmore(), 27 errors: vec![], 28 warnings: vec![], 29 is_line_start: true, 30 is_spaced: false, 31 } 32 } 33 34 /// Consume the tokens provided, attempting to build a [`Document`][] from them. 35 /// 36 /// Where possible, errors are recovered from and collected for later. The returned 37 /// `Document` must not be used if the [`Parse`][] contains errors. 38 pub fn parse(mut self) -> Parse<Document> { 39 let ast = Amble::parse(&mut self); 40 Parse { 41 ast: ast.content, 42 warnings: self.warnings, 43 errors: self.errors, 44 } 45 } 46} 47 48impl Parser<'_> { 49 #[cfg_attr(not(feature = "lax"), allow(dead_code))] 50 pub(crate) fn warn(&mut self, warning: SyntaxError) { 51 self.warnings.push(warning); 52 } 53 54 pub(crate) fn error(&mut self, error: SyntaxError) { 55 self.errors.push(error); 56 } 57 58 pub(crate) fn expected( 59 &mut self, 60 token: Token, 61 message: impl std::fmt::Display, 62 ) -> SyntaxError { 63 let error = SyntaxError::new(token.span, message); 64 self.error(error.clone()); 65 error 66 } 67 68 pub(crate) fn chomp(&mut self) { 69 let mut invalid_tokens = vec![]; 70 loop { 71 let token = self.source.peek().expect("Peeked too many tokens"); 72 if [ 73 TokenType::EndOfLine, 74 TokenType::CommentBlock, 75 TokenType::CommentLine, 76 TokenType::CommentInline, 77 TokenType::Space, 78 ] 79 .matches(token) 80 { 81 self.next(); 82 continue; 83 } 84 if TokenType::Error.matches(token) { 85 invalid_tokens.push(self.next()); 86 continue; 87 } 88 break; 89 } 90 if !invalid_tokens.is_empty() { 91 self.error(SyntaxError::new( 92 invalid_tokens.span(), 93 "invalid characters in input", 94 )); 95 } 96 } 97 98 fn peek_chomp(&mut self) { 99 loop { 100 let Some(token) = self.source.peek() else { 101 return; 102 }; 103 if token.token_type == TokenType::EndOfFile { 104 return; 105 } 106 if [ 107 TokenType::EndOfLine, 108 TokenType::CommentBlock, 109 TokenType::CommentLine, 110 TokenType::CommentInline, 111 TokenType::Space, 112 TokenType::Error, 113 ] 114 .matches(token) 115 { 116 self.source.advance_cursor(); 117 continue; 118 } 119 break; 120 } 121 } 122 123 fn next(&mut self) -> Token { 124 // Technically probably shouldn't unwrap here but if we consume the EndOfFile 125 // it has to be at the end, at which point we consume no more, so this should 126 // be safe. 127 let token = self.source.next().expect("Consumed too many tokens"); 128 129 #[rustfmt::skip] { 130 use TokenType::*; 131 // Different types of whitespace imply that we are truly at the start of a line 132 // without any leading (non-whitespace) characters, as opposed to only the first 133 // whole token on a line but other partial tokens were on this line already 134 // (specifically, block comments). 135 // 136 // The ByteOrderMark, while not technically whitespace (or even allowed in most 137 // parts of the code, for that matter) is included here because its presence is 138 // not considered at all, so should not change the initial states of these bits 139 // in much the same way that StartOfFile does not change them. 140 // 141 // That said, cases where line endings and startings are needed are uncertain, 142 // maybe I don't need both of these flags. 143 self.is_line_start = [EndOfLine, CommentLine, DocInner, DocOuter, ByteOrderMark, StartOfFile].matches(&token) || self.is_line_start && [CommentInline, Space].matches(&token); 144 self.is_spaced = [EndOfLine, CommentLine, DocInner, DocOuter, CommentInline, CommentBlock, Space].matches(&token); 145 }; 146 token 147 } 148 149 fn peek_next(&mut self) -> Option<Token> { 150 self.peek_chomp(); 151 let peeked = self.source.peek().cloned(); 152 self.source.advance_cursor(); 153 peeked 154 } 155 156 pub(crate) fn expect_bang_oparen(&mut self) -> Result<(Token, Token), Token> { 157 use TokenType::*; 158 // Though tokenized as two tokens, this is kind of treated as one token as we 159 // require `!(` to be unspaced in procedure calls. Since whitespace is a token, 160 // a low-level peekmore after the high-level peek will sufficiently detect this. 161 let next = self.peek().clone(); 162 let after = self.source.peek_nth(1); 163 if next.token_type == OpBang && after.unwrap().token_type == OParen { 164 let bang = self.expect(OpBang).unwrap(); 165 let oparen = self.expect(OParen).unwrap(); 166 Ok((bang, oparen)) 167 } else { 168 Err(next) 169 } 170 } 171 172 pub(crate) fn peek(&mut self) -> &Token { 173 self.chomp(); 174 self.source.peek().unwrap() 175 } 176 177 pub(crate) fn force_peek(&mut self) -> &Token { 178 self.source.peek().unwrap() 179 } 180 181 pub(crate) fn peekn(&mut self, n: usize) -> Option<Vec<Token>> { 182 self.chomp(); 183 let tokens = (0..n).map(|_| self.peek_next()).collect::<Option<Vec<_>>>(); 184 self.source.reset_cursor(); 185 tokens 186 } 187 188 pub(crate) fn synchronize(&mut self, pattern: impl TokenPattern) { 189 while !pattern.matches(self.peek()) { 190 self.next(); 191 } 192 } 193 194 pub(crate) fn expect(&mut self, pattern: impl TokenPattern) -> Result<Token, Token> { 195 let token = self.peek(); 196 if !pattern.matches(token) { 197 return Err(token.clone()); 198 } 199 Ok(self.next()) 200 } 201 202 pub(crate) fn consume(&mut self) -> Token { 203 self.chomp(); 204 self.next() 205 } 206 207 pub(crate) fn check(&mut self, pattern: impl TokenPattern) -> Result<&Token, &Token> { 208 let token = self.peek(); 209 if pattern.matches(token) { 210 Ok(token) 211 } else { 212 Err(token) 213 } 214 } 215}