A cunning interpreter for the pure untyped λ-calculus.
1-- SPDX-FileCopyrightText: 2022 Severen Redwood <sev@severen.dev>
2-- SPDX-License-Identifier: GPL-3.0-or-later
3
4module Sly.Parser (parse) where
5
6import Control.Monad.Combinators.Expr (Operator (..), makeExprParser)
7import Data.Foldable (foldr')
8import Data.Functor (void, ($>))
9import Data.Text (Text)
10import Data.Text qualified as T
11import Data.Void (Void)
12import Sly.Syntax
13import Text.Megaparsec hiding (Token, parse)
14import Text.Megaparsec.Char.Lexer qualified as L
15import Unicode.Char.Identifiers (isPatternWhitespace, isXIDContinue, isXIDStart)
16
17{- Grammar Notes
18 * Application is parsed with the highest precedence and associativity to the
19 left, i.e. f x y = ((f x) y).
20 * Abstractions are parsed with bodies extending as far to the right as
21 possible, i.e. λx -> λy -> x y x = (λx -> (λy -> ((x y) x))).
22 * Bracketing of terms can be used to override precedence and associativity
23 when required.
24 * Statements are either an assignment (let X := Y) or a λ-calculus term and
25 end with a full stop '.'.
26
27 For anyone editing this file: One good test to flush out any bugs in the
28 parser is to check whether the following equality is reflected by the parse
29 trees of the LHS and RHS, respectively:
30 λf -> (λx -> f (x x)) (λx -> f (x x)) = (λf -> ((λx -> (f (x x))) (λx -> (f (x x))))).
31-}
32
33-- | The parser monad.
34--
35-- This is a Parsec type synonym to both help type inference and the compiler's
36-- optimiser.
37type Parser = Parsec Void Text
38
39-- | Words that are reserved as keywords and thus disallowed as names.
40keywords :: [Text]
41keywords = ["let", "in"]
42
43-- | Parse and discard one or more whitespace characters.
44space1 :: Parser ()
45space1 = void $ some (satisfy isPatternWhitespace)
46
47-- | Parse and discard zero or more whitespace characters and comments.
48spaceConsumer :: Parser ()
49spaceConsumer =
50 L.space
51 space1
52 (L.skipLineComment "--")
53 (L.skipBlockCommentNested "/-" "-/")
54
55lexeme :: Parser a -> Parser a
56lexeme = L.lexeme spaceConsumer
57
58symbol :: Text -> Parser Text
59symbol = L.symbol spaceConsumer
60
61-- | Create a parser that will parse and discard the given string.
62punc :: Text -> Parser ()
63punc = void . symbol
64
65-- | Create a parser that applies the given parser to an expression between a
66-- pair of round brackets.
67brackets :: Parser a -> Parser a
68brackets = between (punc "(") (punc ")")
69
70-- | Parse a 'start' character in a name.
71nameStart :: Parser Char
72nameStart = satisfy \c -> isXIDStart c && c /= 'λ'
73
74-- | Parse a sequence of 'continue' characters in a name.
75nameContinue :: Parser String
76nameContinue = many $ satisfy \c -> isXIDContinue c || c == '\''
77
78-- | Parse a name according to the Unicode Standard Annex #31.
79name :: Parser Name
80name = Name <$> (lexeme word >>= check) <?> "name"
81 where
82 word = T.pack <$> ((:) <$> nameStart <*> nameContinue)
83 check w
84 | w `notElem` keywords = return w
85 -- TODO: See if the positioning of this error message (when output) can be
86 -- improved.
87 | otherwise = fail $ "keyword " <> T.unpack w <> " cannot be a name"
88
89-- | Parse a variable term.
90variable :: Parser Term
91variable = Var <$> name
92
93-- | Parse a natural number literal.
94natural :: Parser Term
95natural = toChurchNat <$> (lexeme L.decimal >>= check)
96 where
97 check n
98 | n <= toInteger maxInt = return (fromInteger n)
99 | otherwise = fail $ "naturals larger than " <> show maxInt <> " are disallowed"
100 maxInt = maxBound @Int
101
102-- | Parse a Boolean literal.
103boolean :: Parser Term
104boolean = toChurchBool <$> (single '#' *> (try true <|> false))
105 where
106 true = (symbol "true" <|> symbol "t") $> True
107 false = (symbol "false" <|> symbol "f") $> False
108
109-- | Parse a λ-abstraction.
110abstraction :: Parser Term
111abstraction = do
112 punc "\\" <|> punc "λ" <?> "\\"
113 binders <- name `sepBy1` spaceConsumer
114 punc "->" <|> punc "↦" <?> "->"
115 abstract binders <$> term
116 where
117 -- Expand an abstraction with multiple variables into its internal representation of
118 -- nested single-variable abstractions.
119 abstract = flip (foldr' Abs)
120
121-- | Parse an application term.
122application :: Parser (Term -> Term -> Term)
123application = return App
124
125-- | Parse the initial common fragment of a let term or a let statement.
126letStart :: Parser (Name, Term)
127letStart = do
128 punc "let"
129 name' <- name <?> "name"
130 punc ":="
131 term' <- term
132 return (name', term')
133
134-- | Parse a let term.
135let_ :: Parser Term
136let_ = do
137 (x, t) <- letStart
138 punc "in"
139 body <- term
140 -- NOTE: let x := t in body is syntactic sugar for (λx -> body) t.
141 return $ App (Abs x body) t
142
143-- | Parse a λ-term.
144term :: Parser Term
145term = makeExprParser (choice indivisibles) operatorTable
146 where
147 operatorTable = [[InfixL application]]
148 indivisibles =
149 [try variable, natural, boolean, abstraction, let_, brackets term]
150
151-- | Parse an assignment statement.
152assignment :: Parser Statement
153assignment = do
154 (x, t) <- letStart
155 notFollowedBy "in"
156 return (Ass x t)
157
158-- | Parse a term statement.
159termS :: Parser Statement
160termS = Term <$> term
161
162-- | Parse a program statement.
163statement :: Parser Statement
164statement = try assignment <|> termS
165
166-- | Parse the end of a statement.
167eos :: Parser ()
168eos = punc "."
169
170-- | Parse a complete program file.
171--
172-- We define 'program' in this context to be a sequence of bindings and/or terms.
173program :: Parser [Statement]
174program = spaceConsumer *> statement `endBy` eos <* eof
175
176-- | Parse a sly program given a filename and a program string.
177parse :: FilePath -> Text -> Either (ParseErrorBundle Text Void) [Statement]
178parse = runParser program