A cunning interpreter for the pure untyped λ-calculus.
at master 5.6 kB view raw
1-- SPDX-FileCopyrightText: 2022 Severen Redwood <sev@severen.dev> 2-- SPDX-License-Identifier: GPL-3.0-or-later 3 4module Sly.Parser (parse) where 5 6import Control.Monad.Combinators.Expr (Operator (..), makeExprParser) 7import Data.Foldable (foldr') 8import Data.Functor (void, ($>)) 9import Data.Text (Text) 10import Data.Text qualified as T 11import Data.Void (Void) 12import Sly.Syntax 13import Text.Megaparsec hiding (Token, parse) 14import Text.Megaparsec.Char.Lexer qualified as L 15import Unicode.Char.Identifiers (isPatternWhitespace, isXIDContinue, isXIDStart) 16 17{- Grammar Notes 18 * Application is parsed with the highest precedence and associativity to the 19 left, i.e. f x y = ((f x) y). 20 * Abstractions are parsed with bodies extending as far to the right as 21 possible, i.e. λx -> λy -> x y x = (λx -> (λy -> ((x y) x))). 22 * Bracketing of terms can be used to override precedence and associativity 23 when required. 24 * Statements are either an assignment (let X := Y) or a λ-calculus term and 25 end with a full stop '.'. 26 27 For anyone editing this file: One good test to flush out any bugs in the 28 parser is to check whether the following equality is reflected by the parse 29 trees of the LHS and RHS, respectively: 30 λf -> (λx -> f (x x)) (λx -> f (x x)) = (λf -> ((λx -> (f (x x))) (λx -> (f (x x))))). 31-} 32 33-- | The parser monad. 34-- 35-- This is a Parsec type synonym to both help type inference and the compiler's 36-- optimiser. 37type Parser = Parsec Void Text 38 39-- | Words that are reserved as keywords and thus disallowed as names. 40keywords :: [Text] 41keywords = ["let", "in"] 42 43-- | Parse and discard one or more whitespace characters. 44space1 :: Parser () 45space1 = void $ some (satisfy isPatternWhitespace) 46 47-- | Parse and discard zero or more whitespace characters and comments. 48spaceConsumer :: Parser () 49spaceConsumer = 50 L.space 51 space1 52 (L.skipLineComment "--") 53 (L.skipBlockCommentNested "/-" "-/") 54 55lexeme :: Parser a -> Parser a 56lexeme = L.lexeme spaceConsumer 57 58symbol :: Text -> Parser Text 59symbol = L.symbol spaceConsumer 60 61-- | Create a parser that will parse and discard the given string. 62punc :: Text -> Parser () 63punc = void . symbol 64 65-- | Create a parser that applies the given parser to an expression between a 66-- pair of round brackets. 67brackets :: Parser a -> Parser a 68brackets = between (punc "(") (punc ")") 69 70-- | Parse a 'start' character in a name. 71nameStart :: Parser Char 72nameStart = satisfy \c -> isXIDStart c && c /= 'λ' 73 74-- | Parse a sequence of 'continue' characters in a name. 75nameContinue :: Parser String 76nameContinue = many $ satisfy \c -> isXIDContinue c || c == '\'' 77 78-- | Parse a name according to the Unicode Standard Annex #31. 79name :: Parser Name 80name = Name <$> (lexeme word >>= check) <?> "name" 81 where 82 word = T.pack <$> ((:) <$> nameStart <*> nameContinue) 83 check w 84 | w `notElem` keywords = return w 85 -- TODO: See if the positioning of this error message (when output) can be 86 -- improved. 87 | otherwise = fail $ "keyword " <> T.unpack w <> " cannot be a name" 88 89-- | Parse a variable term. 90variable :: Parser Term 91variable = Var <$> name 92 93-- | Parse a natural number literal. 94natural :: Parser Term 95natural = toChurchNat <$> (lexeme L.decimal >>= check) 96 where 97 check n 98 | n <= toInteger maxInt = return (fromInteger n) 99 | otherwise = fail $ "naturals larger than " <> show maxInt <> " are disallowed" 100 maxInt = maxBound @Int 101 102-- | Parse a Boolean literal. 103boolean :: Parser Term 104boolean = toChurchBool <$> (single '#' *> (try true <|> false)) 105 where 106 true = (symbol "true" <|> symbol "t") $> True 107 false = (symbol "false" <|> symbol "f") $> False 108 109-- | Parse a λ-abstraction. 110abstraction :: Parser Term 111abstraction = do 112 punc "\\" <|> punc "λ" <?> "\\" 113 binders <- name `sepBy1` spaceConsumer 114 punc "->" <|> punc "" <?> "->" 115 abstract binders <$> term 116 where 117 -- Expand an abstraction with multiple variables into its internal representation of 118 -- nested single-variable abstractions. 119 abstract = flip (foldr' Abs) 120 121-- | Parse an application term. 122application :: Parser (Term -> Term -> Term) 123application = return App 124 125-- | Parse the initial common fragment of a let term or a let statement. 126letStart :: Parser (Name, Term) 127letStart = do 128 punc "let" 129 name' <- name <?> "name" 130 punc ":=" 131 term' <- term 132 return (name', term') 133 134-- | Parse a let term. 135let_ :: Parser Term 136let_ = do 137 (x, t) <- letStart 138 punc "in" 139 body <- term 140 -- NOTE: let x := t in body is syntactic sugar for (λx -> body) t. 141 return $ App (Abs x body) t 142 143-- | Parse a λ-term. 144term :: Parser Term 145term = makeExprParser (choice indivisibles) operatorTable 146 where 147 operatorTable = [[InfixL application]] 148 indivisibles = 149 [try variable, natural, boolean, abstraction, let_, brackets term] 150 151-- | Parse an assignment statement. 152assignment :: Parser Statement 153assignment = do 154 (x, t) <- letStart 155 notFollowedBy "in" 156 return (Ass x t) 157 158-- | Parse a term statement. 159termS :: Parser Statement 160termS = Term <$> term 161 162-- | Parse a program statement. 163statement :: Parser Statement 164statement = try assignment <|> termS 165 166-- | Parse the end of a statement. 167eos :: Parser () 168eos = punc "." 169 170-- | Parse a complete program file. 171-- 172-- We define 'program' in this context to be a sequence of bindings and/or terms. 173program :: Parser [Statement] 174program = spaceConsumer *> statement `endBy` eos <* eof 175 176-- | Parse a sly program given a filename and a program string. 177parse :: FilePath -> Text -> Either (ParseErrorBundle Text Void) [Statement] 178parse = runParser program