use std::iter::Peekable; use ecow::EcoString; use crate::{ lexer::{self, Span, Token, TokenKind}, parser::ast::{ Annotation, Argument, BinaryOperator, Constant, Definition, Expression, Function, Module, Mutability, Parameter, Publicity, Statement, StructDefinition, StructField, UnaryOperator, UntypedConstant, UntypedDefinition, UntypedExpression, UntypedFunction, UntypedModule, UntypedParameter, UntypedStatement, }, }; mod ast; #[cfg(test)] mod tests; #[derive(Debug)] pub enum Error { LexError(Vec), UnexpectedEndOfFile { expected: Vec<&'static str>, }, UnexpectedToken { token: Token, expected: Vec<&'static str>, }, InvalidEscapeSequence { location: Span, }, } pub fn parse(src: &str) -> Result { let lexer = lexer::new(src); let mut parser = Parser::new(lexer.peekable()); let result = parser.parse(); if !parser.lexer_errors.is_empty() { Err(Error::LexError(parser.lexer_errors)) } else { result } } #[cfg(test)] pub fn parse_statement(src: &str) -> Result { let lexer = lexer::new(src); let mut parser = Parser::new(lexer.peekable()); let result = parser.parse_statement(); if !parser.lexer_errors.is_empty() { Err(Error::LexError(parser.lexer_errors)) } else { result } } struct Parser { tokens: Peekable, lexer_errors: Vec, no_braces: Vec, } impl Parser where Tokens: Iterator>, { fn new(tokens: Peekable) -> Self { Self { tokens, lexer_errors: Vec::new(), no_braces: vec![false], } } fn nest(&mut self) { self.no_braces.push(false); } fn denest(&mut self) { self.no_braces.pop(); } fn peek(&mut self) -> Option { match self.tokens.peek() { Some(Err(error)) => { self.lexer_errors.push(*error); self.tokens.next(); self.peek() } Some(Ok(token)) => Some(token.clone()), None => None, } } fn expect_peek(&mut self, one_of: &[&'static str]) -> Result { match self.peek() { Some(token) => Ok(token), None => Err(Error::UnexpectedEndOfFile { expected: one_of.into(), }), } } fn next(&mut self) -> Option { match self.tokens.next() { Some(Err(error)) => { self.lexer_errors.push(error); self.next() } Some(Ok(token)) => Some(token), None => None, } } fn expect_token(&mut self, one_of: &[&'static str]) -> Result { if let Some(token) = self.next() { Ok(token) } else { Err(Error::UnexpectedEndOfFile { expected: one_of.into(), }) } } fn expect_identifier(&mut self) -> Result<(EcoString, Span), Error> { match self.next() { Some(Token { kind: TokenKind::Identifier(name), location, }) => Ok((name, location)), Some(token) => Err(Error::UnexpectedToken { token, expected: vec!["an identifier"], }), None => Err(Error::UnexpectedEndOfFile { expected: vec!["an identifier"], }), } } fn expect(&mut self, kind: TokenKind, one_of: &[&'static str]) -> Result { match self.next() { Some(token) if token.kind == kind => Ok(token), Some(token) => Err(Error::UnexpectedToken { token, expected: one_of.into(), }), _ => Err(Error::UnexpectedEndOfFile { expected: one_of.into(), }), } } fn maybe_consume(&mut self, kind: &TokenKind) -> Option { match self.peek() { Some(token) if token.kind == *kind => self.next(), _ => None, } } fn parse(&mut self) -> Result { let mut definitions = Vec::new(); while self.peek().is_some() { definitions.push(self.parse_definition()?); } Ok(Module { ast: definitions }) } fn parse_definition(&mut self) -> Result { let mut start = None; let publicity = if let Some(token) = self.maybe_consume(&TokenKind::Pub) { start = Some(token.location.start); Publicity::Public } else { Publicity::Private }; let next = self.expect_token(&["a function"])?; match next.kind { TokenKind::Fn => self .parse_function(publicity, start.unwrap_or(next.location.start)) .map(Definition::Function), TokenKind::Const => self .parse_constant(publicity, start.unwrap_or(next.location.start)) .map(Definition::Constant), TokenKind::Struct => self .parse_struct_definition(publicity, start.unwrap_or(next.location.start)) .map(Definition::Struct), _ => Err(Error::UnexpectedToken { token: next, expected: vec!["a function", "a constant"], }), } } fn parse_struct_definition( &mut self, publicity: Publicity, start: usize, ) -> Result { let (name, _) = self.expect_identifier()?; let parameters = if self.maybe_consume(&TokenKind::Less).is_some() { let (parameters, _) = self.sequence( |this| { let (name, _) = this.expect_identifier()?; Ok(name) }, Some(TokenKind::Comma), TokenKind::Greater, )?; parameters } else { Vec::new() }; self.expect(TokenKind::LeftBrace, &["a struct body"])?; let (fields, end) = self.sequence( Self::parse_struct_field, Some(TokenKind::Comma), TokenKind::RightBrace, )?; Ok(StructDefinition { publicity, name, parameters, fields, location: Span { start, end }, }) } fn parse_struct_field(&mut self) -> Result { let publicity = if self.maybe_consume(&TokenKind::Pub).is_some() { Publicity::Public } else { Publicity::Private }; let (name, _) = self.expect_identifier()?; self.expect(TokenKind::Colon, &["a type annotation"])?; let annotation = self.parse_annotation()?; Ok(StructField { publicity, name, annotation, }) } fn parse_constant( &mut self, publicity: Publicity, start: usize, ) -> Result { let (name, _) = self.expect_identifier()?; let annotation = self.maybe_annotation(TokenKind::Colon)?; self.expect(TokenKind::Equals, &["a constant value"])?; let value = self.parse_expression()?; let end = value.location().end; Ok(Constant { publicity, name, annotation, value, location: Span { start, end }, }) } fn parse_function( &mut self, publicity: Publicity, start: usize, ) -> Result { let (name, _) = self.expect_identifier()?; self.expect(TokenKind::LeftParen, &["an argument list"])?; let (parameters, _) = self.sequence( Self::parse_parameter, Some(TokenKind::Comma), TokenKind::RightParen, )?; let return_annotation = self.maybe_annotation(TokenKind::Arrow)?; self.expect(TokenKind::LeftBrace, &["a function body"])?; let (body, end) = self.sequence(Self::parse_statement, None, TokenKind::RightBrace)?; Ok(Function { publicity, name, parameters, return_type: (), return_annotation, body, location: Span { start, end }, }) } fn parse_parameter(&mut self) -> Result { let (label_or_name, _) = self.expect_identifier()?; let (label, name) = match self.peek().map(|token| token.kind) { Some(TokenKind::Identifier(name)) => { self.next(); (Some(label_or_name), name) } _ => (None, label_or_name), }; let annotation = self.maybe_annotation(TokenKind::Colon)?; Ok(Parameter { label, name, annotation, type_: (), }) } fn maybe_annotation(&mut self, start: TokenKind) -> Result, Error> { if self.maybe_consume(&start).is_none() { Ok(None) } else { self.parse_annotation().map(Some) } } fn parse_annotation(&mut self) -> Result { let next = self.expect_token(&["a type annotation"])?; let start = next.location.start; match next.kind { TokenKind::Identifier(name) => { let (generics, end) = if self.maybe_consume(&TokenKind::Less).is_some() { self.sequence( Self::parse_annotation, Some(TokenKind::Comma), TokenKind::Greater, )? } else { (Vec::new(), next.location.end) }; Ok(Annotation::Name { location: Span { start, end }, name, generics, }) } TokenKind::LeftParen => { let (elements, end) = self.sequence( Self::parse_annotation, Some(TokenKind::Comma), TokenKind::RightParen, )?; Ok(Annotation::Tuple { location: Span { start, end }, elements, }) } TokenKind::Star => { let mutability = if self.maybe_consume(&TokenKind::Mut).is_some() { Mutability::Mutable } else { Mutability::Immutable }; let underlying = self.parse_annotation()?; Ok(Annotation::Pointer { location: Span { start, end: underlying.location().end, }, mutability, underlying: Box::new(underlying), }) } TokenKind::LeftSquare => { let token = self.expect_token(&["an array length", "a closing bracket"])?; match token.kind { TokenKind::RightSquare => { let element = self.parse_annotation()?; Ok(Annotation::List { location: Span { start, end: element.location().end, }, element: Box::new(element), }) } TokenKind::Identifier(name) if name == "_" => { self.expect(TokenKind::RightSquare, &["a closing bracket"])?; let element = self.parse_annotation()?; Ok(Annotation::Array { location: Span { start, end: element.location().end, }, length: None, element: Box::new(element), }) } TokenKind::Int(length) => { self.expect(TokenKind::RightSquare, &["a closing bracket"])?; let element = self.parse_annotation()?; let length = length.parse().expect("Lexed integers must be valid"); Ok(Annotation::Array { location: Span { start, end: element.location().end, }, length: Some(length), element: Box::new(element), }) } _ => Err(Error::UnexpectedToken { token, expected: vec!["an array length", "a closing bracket"], }), } } TokenKind::LeftBrace => { let key = self.parse_annotation()?; self.expect(TokenKind::Colon, &["a colon"])?; let value = self.parse_annotation()?; let end = self .expect(TokenKind::RightBrace, &["a closing brace"])? .location .end; Ok(Annotation::Map { location: Span { start, end }, key: Box::new(key), value: Box::new(value), }) } _ => Err(Error::UnexpectedToken { token: next, expected: vec!["a type annotation"], }), } } fn parse_statement(&mut self) -> Result { match self.expect_peek(&["a statement"])?.kind { _ => self.parse_expression().map(Statement::Expression), } } fn parse_expression(&mut self) -> Result { self.nest(); let result = self.do_parse_expression(Precedence::None); self.denest(); result } fn do_parse_expression( &mut self, target_precedence: Precedence, ) -> Result { use Precedence::*; let mut left = self.parse_expression_unit()?; loop { let Some(token) = self.peek() else { return Ok(left); }; left = match token.kind { TokenKind::Equals if target_precedence <= Assignment => { self.parse_binary_operation(left, BinaryOperator::Assign, Assignment) } TokenKind::PlusEquals if target_precedence <= Assignment => { self.parse_binary_operation(left, BinaryOperator::AddAssign, Assignment) } TokenKind::MinusEquals if target_precedence <= Assignment => { self.parse_binary_operation(left, BinaryOperator::SubAssign, Assignment) } TokenKind::StarEquals if target_precedence <= Assignment => { self.parse_binary_operation(left, BinaryOperator::MulAssign, Assignment) } TokenKind::SlashEquals if target_precedence <= Assignment => { self.parse_binary_operation(left, BinaryOperator::DivAssign, Assignment) } TokenKind::PercentEquals if target_precedence <= Assignment => { self.parse_binary_operation(left, BinaryOperator::ModAssign, Assignment) } TokenKind::DoubleAmpersand if target_precedence < Logical => { self.parse_binary_operation(left, BinaryOperator::LogicalAnd, Logical) } TokenKind::DoublePipe if target_precedence < Logical => { self.parse_binary_operation(left, BinaryOperator::LogicalOr, Logical) } TokenKind::Less if target_precedence < Comparison => { self.parse_binary_operation(left, BinaryOperator::Less, Comparison) } TokenKind::LessEquals if target_precedence < Comparison => { self.parse_binary_operation(left, BinaryOperator::LessEquals, Comparison) } TokenKind::Greater if target_precedence < Comparison => { self.parse_binary_operation(left, BinaryOperator::Greater, Comparison) } TokenKind::GreaterEquals if target_precedence < Comparison => { self.parse_binary_operation(left, BinaryOperator::GreaterEquals, Comparison) } TokenKind::DoubleEquals if target_precedence < Comparison => { self.parse_binary_operation(left, BinaryOperator::Equal, Comparison) } TokenKind::BangEquals if target_precedence < Comparison => { self.parse_binary_operation(left, BinaryOperator::NotEqual, Comparison) } TokenKind::DoubleLess if target_precedence < Bitwise => { self.parse_binary_operation(left, BinaryOperator::LeftShift, Bitwise) } TokenKind::DoubleGreater if target_precedence < Bitwise => { self.parse_binary_operation(left, BinaryOperator::RightShift, Bitwise) } TokenKind::TripleGreater if target_precedence < Bitwise => { self.parse_binary_operation(left, BinaryOperator::ArithmeticRightShift, Bitwise) } TokenKind::Ampersand if target_precedence < Bitwise => { self.parse_binary_operation(left, BinaryOperator::BitwiseAnd, Bitwise) } TokenKind::Pipe if target_precedence < Bitwise => { self.parse_binary_operation(left, BinaryOperator::BitwiseOr, Bitwise) } TokenKind::Caret if target_precedence < Bitwise => { self.parse_binary_operation(left, BinaryOperator::BitwiseXor, Bitwise) } TokenKind::Plus if target_precedence < Add => { self.parse_binary_operation(left, BinaryOperator::Add, Add) } TokenKind::Minus if target_precedence < Add => { self.parse_binary_operation(left, BinaryOperator::Subtract, Add) } TokenKind::Star if target_precedence < Multiply => { self.parse_binary_operation(left, BinaryOperator::Multiply, Multiply) } TokenKind::Slash if target_precedence < Multiply => { self.parse_binary_operation(left, BinaryOperator::Divide, Multiply) } TokenKind::Percent if target_precedence < Multiply => { self.parse_binary_operation(left, BinaryOperator::Remainder, Multiply) } TokenKind::DoubleStar if target_precedence <= Power => { self.parse_binary_operation(left, BinaryOperator::Power, Power) } TokenKind::DoublePlus if target_precedence < Postfix => { self.next(); Ok(Expression::UnaryOperator { location: Span::new(left.location().start, token.location.end), value: Box::new(left), operator: UnaryOperator::Increment, type_: (), }) } TokenKind::DoubleMinus if target_precedence < Postfix => { self.next(); Ok(Expression::UnaryOperator { location: Span::new(left.location().start, token.location.end), value: Box::new(left), operator: UnaryOperator::Decrement, type_: (), }) } TokenKind::Question if target_precedence < Postfix => { self.next(); Ok(Expression::UnaryOperator { location: Span::new(left.location().start, token.location.end), value: Box::new(left), operator: UnaryOperator::ErrorPropagation, type_: (), }) } TokenKind::LeftBrace if !self.no_braces.last().unwrap_or(&false) && target_precedence < Postfix => { self.parse_struct(left) } TokenKind::LeftParen if target_precedence < Postfix => self.parse_call(left), TokenKind::Dot if target_precedence < Postfix => self.parse_field_access(left), TokenKind::LeftSquare if target_precedence < Postfix => self.parse_index(left), _ => return Ok(left), }?; } } fn parse_struct(&mut self, left: UntypedExpression) -> Result { self.next(); let (fields, end) = self.sequence( Self::parse_argument, Some(TokenKind::Comma), TokenKind::RightBrace, )?; let location = Span::new(left.location().start, end); Ok(Expression::Struct { location, struct_: Box::new(left), fields, type_: (), }) } fn parse_call(&mut self, left: UntypedExpression) -> Result { self.next(); let (arguments, end) = self.sequence( Self::parse_argument, Some(TokenKind::Comma), TokenKind::RightParen, )?; let location = Span::new(left.location().start, end); Ok(Expression::Call { location, function: Box::new(left), arguments, type_: (), }) } fn parse_argument(&mut self) -> Result, Error> { let expression_or_label = self.parse_expression()?; let (label, value) = match expression_or_label { Expression::Variable { name, .. } if self.maybe_consume(&TokenKind::Colon).is_some() => { let value = self.parse_expression()?; (Some(name), value) } _ => (None, expression_or_label), }; Ok(Argument { label, value }) } fn parse_field_access(&mut self, left: UntypedExpression) -> Result { self.next(); let (field, field_location) = self.expect_identifier()?; let location = Span::new(left.location().start, field_location.end); Ok(Expression::FieldAccess { location, left: Box::new(left), field, type_: (), }) } fn parse_index(&mut self, left: UntypedExpression) -> Result { self.next(); let index = self.parse_expression()?; let closing_token = self.expect(TokenKind::RightSquare, &["A closing bracket"])?; let location = Span::new(left.location().start, closing_token.location.end); Ok(Expression::Index { location, left: Box::new(left), index: Box::new(index), type_: (), }) } fn parse_binary_operation( &mut self, left: UntypedExpression, operator: BinaryOperator, precedence: Precedence, ) -> Result { self.next(); let right = self.do_parse_expression(precedence)?; Ok(Expression::BinaryOperator { location: Span::new(left.location().start, right.location().end), left: Box::new(left), operator, right: Box::new(right), type_: (), }) } fn parse_expression_unit(&mut self) -> Result { let next = self.expect_token(&["an expression"])?; match next.kind { TokenKind::Int(value) => { let value = value.parse().expect("Lexed integers must be valid"); Ok(Expression::Int { location: next.location, value, }) } TokenKind::True => Ok(Expression::Bool { location: next.location, value: true, }), TokenKind::False => Ok(Expression::Bool { location: next.location, value: false, }), TokenKind::Float(value) => { let value = value.parse().expect("Lexed floats must be valid"); Ok(Expression::Float { location: next.location, value, }) } TokenKind::String(value) => { let value = self.escape_string_value(value, next.location.start)?; Ok(Expression::String { location: next.location, value, }) } TokenKind::LeftParen => { let (elements, end) = self.sequence( Self::parse_expression, Some(TokenKind::Comma), TokenKind::RightParen, )?; if elements.len() == 1 { let first = elements .into_iter() .next() .expect("Elements has one element"); return Ok(Expression::Group { location: Span::new(next.location.start, end), inner: Box::new(first), }); } Ok(Expression::Tuple { location: Span::new(next.location.start, end), elements, type_: (), }) } TokenKind::LeftSquare => { let (elements, end) = self.sequence( Self::parse_expression, Some(TokenKind::Comma), TokenKind::RightSquare, )?; Ok(Expression::Array { location: Span::new(next.location.start, end), elements, type_: (), }) } TokenKind::LeftBrace => self.parse_map_or_block(next.location.start), TokenKind::Identifier(name) => Ok(Expression::Variable { location: next.location, name, type_: (), }), TokenKind::Fn => { self.expect(TokenKind::LeftParen, &["an argument list"])?; let (parameters, _) = self.sequence( Self::parse_parameter, Some(TokenKind::Comma), TokenKind::RightParen, )?; let return_annotation = self.maybe_annotation(TokenKind::Arrow)?; self.expect(TokenKind::LeftBrace, &["a function body"])?; let (body, end) = self.sequence(Self::parse_statement, None, TokenKind::RightBrace)?; Ok(Expression::Function { location: Span::new(next.location.start, end), parameters, return_annotation, body, type_: (), }) } TokenKind::If => self.parse_if(next.location.start), TokenKind::Match => { todo!() } TokenKind::Plus => { let value = self.do_parse_expression(Precedence::Prefix)?; Ok(Expression::UnaryOperator { location: Span::new(next.location.start, value.location().end), value: Box::new(value), operator: UnaryOperator::Plus, type_: (), }) } TokenKind::Minus => { let value = self.do_parse_expression(Precedence::Prefix)?; Ok(Expression::UnaryOperator { location: Span::new(next.location.start, value.location().end), value: Box::new(value), operator: UnaryOperator::Minus, type_: (), }) } TokenKind::Bang => { let value = self.do_parse_expression(Precedence::Prefix)?; Ok(Expression::UnaryOperator { location: Span::new(next.location.start, value.location().end), value: Box::new(value), operator: UnaryOperator::LogicalNot, type_: (), }) } TokenKind::Star => { let value = self.do_parse_expression(Precedence::Prefix)?; Ok(Expression::UnaryOperator { location: Span::new(next.location.start, value.location().end), value: Box::new(value), operator: UnaryOperator::Dereference, type_: (), }) } TokenKind::Ampersand => { let operator = if self.maybe_consume(&TokenKind::Mut).is_some() { UnaryOperator::MutableReference } else { UnaryOperator::Reference }; let value = self.do_parse_expression(Precedence::Prefix)?; Ok(Expression::UnaryOperator { location: Span::new(next.location.start, value.location().end), value: Box::new(value), operator, type_: (), }) } TokenKind::Tilde => { let value = self.do_parse_expression(Precedence::Prefix)?; Ok(Expression::UnaryOperator { location: Span::new(next.location.start, value.location().end), value: Box::new(value), operator: UnaryOperator::BitwiseNot, type_: (), }) } // Block _ => Err(Error::UnexpectedToken { token: next, expected: vec!["an expression"], }), } } fn parse_if(&mut self, start: usize) -> Result { self.no_braces.push(true); let condition = self.do_parse_expression(Precedence::None)?; self.no_braces.pop(); self.expect(TokenKind::LeftBrace, &["a block"])?; let (body, mut end) = self.sequence(Self::parse_statement, None, TokenKind::RightBrace)?; let mut else_branch = None; if self.maybe_consume(&TokenKind::Else).is_some() { let next = self.expect_token(&["an if statement", "a block"])?; let else_ = match next.kind { TokenKind::If => self.parse_if(next.location.start)?, TokenKind::LeftBrace => { let (statements, end) = self.sequence(Self::parse_statement, None, TokenKind::RightBrace)?; Expression::Block { location: Span::new(next.location.start, end), statements, } } _ => { return Err(Error::UnexpectedToken { token: next, expected: vec!["an if statement", "a block"], }); } }; end = else_.location().end; else_branch = Some(Box::new(else_)); } Ok(Expression::If { location: Span { start, end }, condition: Box::new(condition), body, else_branch, }) } fn parse_map_or_block(&mut self, start: usize) -> Result { if let Some(token) = self.maybe_consume(&TokenKind::RightBrace) { return Ok(Expression::Map { location: Span::new(start, token.location.end), elements: Vec::new(), type_: (), }); } let first = self.parse_statement()?; match first { Statement::Expression(key) if self.maybe_consume(&TokenKind::Colon).is_some() => { let value = self.parse_expression()?; let (elements, end) = self.sequence_with_start( (key, value), |this| { let key = this.parse_expression()?; this.expect(TokenKind::Colon, &["a colon"])?; let value = this.parse_expression()?; Ok((key, value)) }, Some(TokenKind::Comma), TokenKind::RightBrace, )?; Ok(Expression::Map { location: Span { start, end }, elements, type_: (), }) } _ => { let (statements, end) = self.sequence_with_start( first, Self::parse_statement, None, TokenKind::RightBrace, )?; Ok(Expression::Block { location: Span { start, end }, statements, }) } } } fn escape_string_value(&self, value: EcoString, start: usize) -> Result { let mut out = EcoString::new(); let mut chars = value.char_indices(); chars.next(); while let Some((i, char)) = chars.next() { if char == '"' { break; } else if char == '\\' { let (escape_index, char) = chars .next() .expect("Valid strings do not end in backslashes"); match char { '\\' => out.push('\\'), '"' => out.push('"'), 'n' => out.push('\n'), 't' => out.push('\t'), '\n' => {} _ => { return Err(Error::InvalidEscapeSequence { location: Span::new(start + i, start + escape_index + char.len_utf8()), }); } } } else { out.push(char); } } Ok(out) } fn sequence( &mut self, parse: impl FnMut(&mut Self) -> Result, delimiter: Option, end: TokenKind, ) -> Result<(Vec, usize), Error> { self.do_sequence(parse, delimiter, end, Vec::new()) } fn sequence_with_start( &mut self, start: A, parse: impl FnMut(&mut Self) -> Result, delimiter: Option, end: TokenKind, ) -> Result<(Vec, usize), Error> { if let Some(delimiter) = &delimiter { if self.maybe_consume(delimiter).is_none() { let end = self.expect(end, &["the end of the list"])?.location.end; return Ok((vec![start], end)); } } self.do_sequence(parse, delimiter, end, vec![start]) } fn do_sequence( &mut self, mut parse: impl FnMut(&mut Self) -> Result, delimiter: Option, end: TokenKind, mut results: Vec, ) -> Result<(Vec, usize), Error> { loop { if self.peek().is_none_or(|token| token.kind == end) { break; } results.push(parse(self)?); if let Some(delimiter) = &delimiter { if self.maybe_consume(delimiter).is_none() { break; } } } let end = self.expect(end, &["the end of the list"])?.location.end; Ok((results, end)) } } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] enum Precedence { None, Assignment, Logical, Comparison, Bitwise, Add, Multiply, Power, Prefix, Postfix, }