2024-12-18 18:55:51 -06:00
|
|
|
module Parser where
|
|
|
|
|
|
|
|
import Lexer
|
2025-01-20 19:20:29 -06:00
|
|
|
import Research
|
2024-12-29 20:29:41 -06:00
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
import Control.Monad (void)
|
|
|
|
import Control.Monad.State
|
|
|
|
import Data.List.NonEmpty (toList)
|
2024-12-29 20:29:41 -06:00
|
|
|
import Data.Void (Void)
|
2024-12-18 18:55:51 -06:00
|
|
|
import Text.Megaparsec
|
2024-12-31 10:00:52 -06:00
|
|
|
import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty)
|
2024-12-29 20:29:41 -06:00
|
|
|
import qualified Data.Set as Set
|
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
data PState = PState
|
|
|
|
{ depth :: Int
|
|
|
|
} deriving (Show)
|
|
|
|
|
|
|
|
type ParserM = StateT PState (Parsec Void [LToken])
|
|
|
|
|
|
|
|
satisfyM :: (LToken -> Bool) -> ParserM LToken
|
|
|
|
satisfyM f = do
|
|
|
|
token <- lift (satisfy f)
|
|
|
|
modify' (updateDepth token)
|
|
|
|
return token
|
|
|
|
|
|
|
|
updateDepth :: LToken -> PState -> PState
|
|
|
|
updateDepth LOpenParen st = st { depth = depth st + 1 }
|
|
|
|
updateDepth LCloseParen st = st { depth = max 0 (depth st - 1) }
|
|
|
|
updateDepth _ st = st
|
|
|
|
|
|
|
|
topLevelNewline :: ParserM ()
|
|
|
|
topLevelNewline = do
|
|
|
|
st <- get
|
|
|
|
if depth st == 0
|
|
|
|
then void (satisfyM (== LNewline))
|
|
|
|
else fail "Top-level exit in paren context"
|
|
|
|
|
|
|
|
parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST]
|
|
|
|
parseProgram tokens =
|
|
|
|
runParser (evalStateT parseProgramM (PState 0)) "" tokens
|
|
|
|
|
|
|
|
parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST
|
|
|
|
parseSingleExpr tokens =
|
|
|
|
runParser (evalStateT (scnParserM *> parseExpressionM <* eofM) (PState 0)) "" tokens
|
2024-12-27 12:27:00 -06:00
|
|
|
|
2024-12-29 08:29:25 -06:00
|
|
|
parseTricu :: String -> [TricuAST]
|
2025-01-20 19:20:29 -06:00
|
|
|
parseTricu input =
|
|
|
|
case lexTricu input of
|
|
|
|
[] -> []
|
|
|
|
toks ->
|
|
|
|
case parseProgram toks of
|
|
|
|
Left err -> error (handleParseError err)
|
|
|
|
Right asts -> asts
|
2024-12-20 11:38:09 -06:00
|
|
|
|
2024-12-29 08:29:25 -06:00
|
|
|
parseSingle :: String -> TricuAST
|
2025-01-20 19:20:29 -06:00
|
|
|
parseSingle input =
|
|
|
|
case lexTricu input of
|
|
|
|
[] -> SEmpty
|
|
|
|
toks ->
|
|
|
|
case parseSingleExpr toks of
|
|
|
|
Left err -> error (handleParseError err)
|
|
|
|
Right ast -> ast
|
|
|
|
|
|
|
|
parseProgramM :: ParserM [TricuAST]
|
|
|
|
parseProgramM = do
|
|
|
|
skipMany topLevelNewline
|
|
|
|
exprs <- sepEndBy parseOneExpression (some topLevelNewline)
|
|
|
|
skipMany topLevelNewline
|
|
|
|
return exprs
|
|
|
|
|
|
|
|
parseOneExpression :: ParserM TricuAST
|
|
|
|
parseOneExpression = scnParserM *> parseExpressionM
|
|
|
|
|
|
|
|
scnParserM :: ParserM ()
|
|
|
|
scnParserM = skipMany $ do
|
|
|
|
t <- lookAhead anySingle
|
|
|
|
st <- get
|
|
|
|
if depth st > 0 && isNewline t
|
|
|
|
then void (satisfyM isNewline)
|
|
|
|
else
|
|
|
|
fail "In paren context or no space token" <|> empty
|
|
|
|
|
|
|
|
eofM :: ParserM ()
|
|
|
|
eofM = lift eof
|
|
|
|
|
|
|
|
parseExpressionM :: ParserM TricuAST
|
|
|
|
parseExpressionM = choice
|
|
|
|
[ try parseFunctionM
|
|
|
|
, try parseLambdaM
|
|
|
|
, try parseLambdaExpressionM
|
|
|
|
, try parseListLiteralM
|
|
|
|
, try parseApplicationM
|
|
|
|
, try parseTreeTermM
|
|
|
|
, parseLiteralM
|
2024-12-18 18:55:51 -06:00
|
|
|
]
|
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseFunctionM :: ParserM TricuAST
|
|
|
|
parseFunctionM = do
|
|
|
|
LIdentifier name <- satisfyM isIdentifier
|
|
|
|
args <- many (satisfyM isIdentifier)
|
|
|
|
_ <- satisfyM (== LAssign)
|
|
|
|
scnParserM
|
|
|
|
body <- parseExpressionM
|
|
|
|
pure (SFunc name (map getIdentifier args) body)
|
|
|
|
|
|
|
|
parseLambdaM :: ParserM TricuAST
|
|
|
|
parseLambdaM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ do
|
|
|
|
_ <- satisfyM (== LBackslash)
|
|
|
|
param <- satisfyM isIdentifier
|
|
|
|
rest <- many (satisfyM isIdentifier)
|
|
|
|
_ <- satisfyM (== LColon)
|
|
|
|
scnParserM
|
|
|
|
body <- parseLambdaExpressionM
|
|
|
|
let nested = foldr (\v acc -> SLambda [getIdentifier v] acc) body rest
|
|
|
|
pure (SLambda [getIdentifier param] nested)
|
|
|
|
|
|
|
|
parseLambdaExpressionM :: ParserM TricuAST
|
|
|
|
parseLambdaExpressionM = choice
|
|
|
|
[ try parseLambdaApplicationM
|
|
|
|
, parseAtomicLambdaM
|
2024-12-27 08:17:06 -06:00
|
|
|
]
|
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseAtomicLambdaM :: ParserM TricuAST
|
|
|
|
parseAtomicLambdaM = choice
|
|
|
|
[ parseVarM
|
|
|
|
, parseTreeLeafM
|
|
|
|
, parseLiteralM
|
|
|
|
, parseListLiteralM
|
|
|
|
, try parseLambdaM
|
|
|
|
, between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM
|
2024-12-27 08:17:06 -06:00
|
|
|
]
|
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseApplicationM :: ParserM TricuAST
|
|
|
|
parseApplicationM = do
|
|
|
|
func <- parseAtomicBaseM
|
|
|
|
scnParserM
|
|
|
|
args <- many $ do
|
|
|
|
scnParserM
|
|
|
|
arg <- parseAtomicM
|
|
|
|
return arg
|
|
|
|
return $ foldl SApp func args
|
|
|
|
|
|
|
|
parseLambdaApplicationM :: ParserM TricuAST
|
|
|
|
parseLambdaApplicationM = do
|
|
|
|
func <- parseAtomicLambdaM
|
|
|
|
scnParserM
|
|
|
|
args <- many $ do
|
|
|
|
arg <- parseAtomicLambdaM
|
|
|
|
scnParserM
|
|
|
|
pure arg
|
|
|
|
pure $ foldl SApp func args
|
|
|
|
|
|
|
|
parseAtomicBaseM :: ParserM TricuAST
|
|
|
|
parseAtomicBaseM = choice
|
|
|
|
[ parseTreeLeafM
|
|
|
|
, parseGroupedM
|
|
|
|
]
|
2024-12-18 18:55:51 -06:00
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseTreeLeafM :: ParserM TricuAST
|
|
|
|
parseTreeLeafM = do
|
|
|
|
_ <- satisfyM isKeywordT
|
|
|
|
notFollowedBy (lift (satisfy (== LAssign)))
|
|
|
|
pure TLeaf
|
|
|
|
|
|
|
|
parseTreeTermM :: ParserM TricuAST
|
|
|
|
parseTreeTermM = do
|
|
|
|
base <- parseTreeLeafOrParenthesizedM
|
|
|
|
rest <- many parseTreeLeafOrParenthesizedM
|
|
|
|
pure (foldl combine base rest)
|
2024-12-18 18:55:51 -06:00
|
|
|
where
|
2025-01-19 14:41:25 -06:00
|
|
|
combine acc next
|
|
|
|
| TLeaf <- acc = TStem next
|
|
|
|
| TStem t <- acc = TFork t next
|
|
|
|
| TFork _ _ <- acc = TFork acc next
|
2024-12-18 18:55:51 -06:00
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseTreeLeafOrParenthesizedM :: ParserM TricuAST
|
|
|
|
parseTreeLeafOrParenthesizedM = choice
|
|
|
|
[ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM
|
|
|
|
, parseTreeLeafM
|
2024-12-18 18:55:51 -06:00
|
|
|
]
|
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseAtomicM :: ParserM TricuAST
|
|
|
|
parseAtomicM = choice
|
|
|
|
[ parseVarM
|
|
|
|
, parseTreeLeafM
|
|
|
|
, parseListLiteralM
|
|
|
|
, parseGroupedM
|
|
|
|
, parseLiteralM
|
2024-12-18 18:55:51 -06:00
|
|
|
]
|
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseGroupedM :: ParserM TricuAST
|
|
|
|
parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $
|
|
|
|
scnParserM *> parseExpressionM <* scnParserM
|
2024-12-20 11:38:09 -06:00
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseLiteralM :: ParserM TricuAST
|
|
|
|
parseLiteralM = choice
|
|
|
|
[ parseIntLiteralM
|
|
|
|
, parseStrLiteralM
|
2024-12-18 18:55:51 -06:00
|
|
|
]
|
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseListLiteralM :: ParserM TricuAST
|
|
|
|
parseListLiteralM = do
|
|
|
|
_ <- satisfyM (== LOpenBracket)
|
|
|
|
elements <- many parseListItemM
|
|
|
|
_ <- satisfyM (== LCloseBracket)
|
|
|
|
pure (SList elements)
|
|
|
|
|
|
|
|
parseListItemM :: ParserM TricuAST
|
|
|
|
parseListItemM = choice
|
|
|
|
[ parseGroupedItemM
|
|
|
|
, parseListLiteralM
|
|
|
|
, parseSingleItemM
|
2024-12-19 19:53:32 -06:00
|
|
|
]
|
2024-12-19 18:57:57 -06:00
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseGroupedItemM :: ParserM TricuAST
|
|
|
|
parseGroupedItemM = do
|
|
|
|
_ <- satisfyM (== LOpenParen)
|
|
|
|
inner <- parseExpressionM
|
|
|
|
_ <- satisfyM (== LCloseParen)
|
|
|
|
pure inner
|
2024-12-19 18:57:57 -06:00
|
|
|
|
2025-01-20 19:20:29 -06:00
|
|
|
parseSingleItemM :: ParserM TricuAST
|
|
|
|
parseSingleItemM = do
|
|
|
|
token <- satisfyM isListItem
|
2024-12-19 18:57:57 -06:00
|
|
|
case token of
|
2025-01-20 19:20:29 -06:00
|
|
|
LIdentifier name -> pure (SVar name)
|
|
|
|
LKeywordT -> pure TLeaf
|
|
|
|
_ -> fail "Unexpected token in list item"
|
|
|
|
|
|
|
|
parseVarM :: ParserM TricuAST
|
|
|
|
parseVarM = do
|
|
|
|
LIdentifier name <- satisfyM isIdentifier
|
|
|
|
if name == "t" || name == "__result"
|
|
|
|
then fail ("Reserved keyword: " ++ name ++ " cannot be assigned.")
|
|
|
|
else pure (SVar name)
|
|
|
|
|
|
|
|
parseIntLiteralM :: ParserM TricuAST
|
|
|
|
parseIntLiteralM = do
|
|
|
|
LIntegerLiteral value <- satisfyM isIntegerLiteral
|
|
|
|
pure (SInt value)
|
|
|
|
|
|
|
|
parseStrLiteralM :: ParserM TricuAST
|
|
|
|
parseStrLiteralM = do
|
|
|
|
LStringLiteral value <- satisfyM isStringLiteral
|
|
|
|
pure (SStr value)
|
|
|
|
|
|
|
|
isKeywordT :: LToken -> Bool
|
|
|
|
isKeywordT LKeywordT = True
|
|
|
|
isKeywordT _ = False
|
|
|
|
|
|
|
|
isIdentifier :: LToken -> Bool
|
2024-12-27 12:27:00 -06:00
|
|
|
isIdentifier (LIdentifier _) = True
|
2025-01-20 19:20:29 -06:00
|
|
|
isIdentifier _ = False
|
|
|
|
|
|
|
|
isIntegerLiteral :: LToken -> Bool
|
2024-12-18 18:55:51 -06:00
|
|
|
isIntegerLiteral (LIntegerLiteral _) = True
|
2025-01-20 19:20:29 -06:00
|
|
|
isIntegerLiteral _ = False
|
|
|
|
|
|
|
|
isStringLiteral :: LToken -> Bool
|
|
|
|
isStringLiteral (LStringLiteral _) = True
|
|
|
|
isStringLiteral _ = False
|
|
|
|
|
|
|
|
isLiteral :: LToken -> Bool
|
2024-12-27 12:27:00 -06:00
|
|
|
isLiteral (LIntegerLiteral _) = True
|
2025-01-20 19:20:29 -06:00
|
|
|
isLiteral (LStringLiteral _) = True
|
|
|
|
isLiteral _ = False
|
|
|
|
|
|
|
|
isListItem :: LToken -> Bool
|
|
|
|
isListItem (LIdentifier _) = True
|
|
|
|
isListItem LKeywordT = True
|
|
|
|
isListItem _ = False
|
|
|
|
|
|
|
|
isNewline :: LToken -> Bool
|
|
|
|
isNewline LNewline = True
|
|
|
|
isNewline _ = False
|
|
|
|
|
|
|
|
getIdentifier :: LToken -> String
|
|
|
|
getIdentifier (LIdentifier name) = name
|
|
|
|
getIdentifier _ = error "Expected identifier"
|
2024-12-30 08:30:40 -06:00
|
|
|
|
2024-12-20 11:38:09 -06:00
|
|
|
handleParseError :: ParseErrorBundle [LToken] Void -> String
|
|
|
|
handleParseError bundle =
|
2025-01-20 19:20:29 -06:00
|
|
|
let errors = bundleErrors bundle
|
|
|
|
errorList = Data.List.NonEmpty.toList errors
|
|
|
|
formattedErrs = map showError errorList
|
|
|
|
in unlines ("Parse error(s) encountered:" : formattedErrs)
|
2024-12-20 11:38:09 -06:00
|
|
|
|
|
|
|
showError :: ParseError [LToken] Void -> String
|
|
|
|
showError (TrivialError offset (Just (Tokens tokenStream)) expected) =
|
2025-01-20 19:20:29 -06:00
|
|
|
"Parse error at offset " ++ show offset
|
|
|
|
++ ": unexpected token " ++ show tokenStream
|
|
|
|
++ ", expected one of " ++ show (Set.toList expected)
|
2024-12-20 11:38:09 -06:00
|
|
|
showError (FancyError offset fancy) =
|
2025-01-20 19:20:29 -06:00
|
|
|
"Parse error at offset " ++ show offset ++ ":\n "
|
|
|
|
++ unlines (map show (Set.toList fancy))
|
2024-12-20 11:38:09 -06:00
|
|
|
showError (TrivialError offset Nothing expected) =
|
2025-01-20 19:20:29 -06:00
|
|
|
"Parse error at offset " ++ show offset
|
|
|
|
++ ": expected one of " ++ show (Set.toList expected)
|
|
|
|
|