tricu/src/Parser.hs

292 lines
7.8 KiB
Haskell
Raw Normal View History

module Parser where
import Lexer
2025-01-20 19:20:29 -06:00
import Research
2024-12-29 20:29:41 -06:00
2025-01-20 19:20:29 -06:00
import Control.Monad (void)
import Control.Monad.State
import Data.List.NonEmpty (toList)
2024-12-29 20:29:41 -06:00
import Data.Void (Void)
import Text.Megaparsec
import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty)
2024-12-29 20:29:41 -06:00
import qualified Data.Set as Set
2025-01-20 19:20:29 -06:00
data PState = PState
{ depth :: Int
} deriving (Show)
type ParserM = StateT PState (Parsec Void [LToken])
satisfyM :: (LToken -> Bool) -> ParserM LToken
satisfyM f = do
token <- lift (satisfy f)
modify' (updateDepth token)
return token
updateDepth :: LToken -> PState -> PState
updateDepth LOpenParen st = st { depth = depth st + 1 }
updateDepth LCloseParen st = st { depth = max 0 (depth st - 1) }
updateDepth _ st = st
topLevelNewline :: ParserM ()
topLevelNewline = do
st <- get
if depth st == 0
then void (satisfyM (== LNewline))
else fail "Top-level exit in paren context"
parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST]
parseProgram tokens =
runParser (evalStateT parseProgramM (PState 0)) "" tokens
parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST
parseSingleExpr tokens =
runParser (evalStateT (scnParserM *> parseExpressionM <* eofM) (PState 0)) "" tokens
2024-12-29 08:29:25 -06:00
parseTricu :: String -> [TricuAST]
2025-01-20 19:20:29 -06:00
parseTricu input =
case lexTricu input of
[] -> []
toks ->
case parseProgram toks of
Left err -> error (handleParseError err)
Right asts -> asts
2024-12-29 08:29:25 -06:00
parseSingle :: String -> TricuAST
2025-01-20 19:20:29 -06:00
parseSingle input =
case lexTricu input of
[] -> SEmpty
toks ->
case parseSingleExpr toks of
Left err -> error (handleParseError err)
Right ast -> ast
parseProgramM :: ParserM [TricuAST]
parseProgramM = do
skipMany topLevelNewline
exprs <- sepEndBy parseOneExpression (some topLevelNewline)
skipMany topLevelNewline
return exprs
parseOneExpression :: ParserM TricuAST
parseOneExpression = scnParserM *> parseExpressionM
scnParserM :: ParserM ()
scnParserM = skipMany $ do
2025-01-21 14:21:47 -06:00
t <- lookAhead anySingle
2025-01-20 19:20:29 -06:00
st <- get
2025-01-21 14:21:47 -06:00
if | depth st > 0 && case t of
LNewline -> True
_ -> False -> void $ satisfyM $ \case
LNewline -> True
_ -> False
| otherwise -> fail "In paren context or no space token" <|> empty
2025-01-20 19:20:29 -06:00
eofM :: ParserM ()
eofM = lift eof
parseExpressionM :: ParserM TricuAST
parseExpressionM = choice
[ try parseFunctionM
, try parseLambdaM
, try parseLambdaExpressionM
, try parseListLiteralM
, try parseApplicationM
, try parseTreeTermM
, parseLiteralM
]
2025-01-20 19:20:29 -06:00
parseFunctionM :: ParserM TricuAST
parseFunctionM = do
2025-01-21 14:21:47 -06:00
LIdentifier name <- satisfyM $ \case
LIdentifier _ -> True
_ -> False
args <- many $ satisfyM $ \case
LIdentifier _ -> True
_ -> False
2025-01-20 19:20:29 -06:00
_ <- satisfyM (== LAssign)
scnParserM
body <- parseExpressionM
pure (SFunc name (map getIdentifier args) body)
parseLambdaM :: ParserM TricuAST
2025-01-21 14:21:47 -06:00
parseLambdaM =
between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ do
_ <- satisfyM (== LBackslash)
param <- satisfyM $ \case
LIdentifier _ -> True
_ -> False
rest <- many $ satisfyM $ \case
LIdentifier _ -> True
_ -> False
_ <- satisfyM (== LColon)
scnParserM
body <- parseLambdaExpressionM
let nested = foldr (\v acc -> SLambda [getIdentifier v] acc) body rest
pure (SLambda [getIdentifier param] nested)
2025-01-20 19:20:29 -06:00
parseLambdaExpressionM :: ParserM TricuAST
parseLambdaExpressionM = choice
[ try parseLambdaApplicationM
, parseAtomicLambdaM
2024-12-27 08:17:06 -06:00
]
2025-01-20 19:20:29 -06:00
parseAtomicLambdaM :: ParserM TricuAST
parseAtomicLambdaM = choice
[ parseVarM
, parseTreeLeafM
, parseLiteralM
, parseListLiteralM
, try parseLambdaM
, between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM
2024-12-27 08:17:06 -06:00
]
2025-01-20 19:20:29 -06:00
parseApplicationM :: ParserM TricuAST
parseApplicationM = do
func <- parseAtomicBaseM
scnParserM
args <- many $ do
scnParserM
arg <- parseAtomicM
return arg
return $ foldl SApp func args
parseLambdaApplicationM :: ParserM TricuAST
parseLambdaApplicationM = do
func <- parseAtomicLambdaM
scnParserM
args <- many $ do
arg <- parseAtomicLambdaM
scnParserM
pure arg
pure $ foldl SApp func args
parseAtomicBaseM :: ParserM TricuAST
parseAtomicBaseM = choice
[ parseTreeLeafM
, parseGroupedM
]
2025-01-20 19:20:29 -06:00
parseTreeLeafM :: ParserM TricuAST
parseTreeLeafM = do
2025-01-21 14:21:47 -06:00
_ <- satisfyM $ \case
LKeywordT -> True
_ -> False
notFollowedBy $ lift $ satisfy (== LAssign)
2025-01-20 19:20:29 -06:00
pure TLeaf
parseTreeTermM :: ParserM TricuAST
parseTreeTermM = do
base <- parseTreeLeafOrParenthesizedM
rest <- many parseTreeLeafOrParenthesizedM
pure (foldl combine base rest)
where
combine acc next
| TLeaf <- acc = TStem next
| TStem t <- acc = TFork t next
| TFork _ _ <- acc = TFork acc next
2025-01-20 19:20:29 -06:00
parseTreeLeafOrParenthesizedM :: ParserM TricuAST
parseTreeLeafOrParenthesizedM = choice
[ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM
, parseTreeLeafM
]
2025-01-20 19:20:29 -06:00
parseAtomicM :: ParserM TricuAST
parseAtomicM = choice
[ parseVarM
, parseTreeLeafM
, parseListLiteralM
, parseGroupedM
, parseLiteralM
]
2025-01-20 19:20:29 -06:00
parseGroupedM :: ParserM TricuAST
parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $
scnParserM *> parseExpressionM <* scnParserM
2025-01-20 19:20:29 -06:00
parseLiteralM :: ParserM TricuAST
parseLiteralM = choice
[ parseIntLiteralM
, parseStrLiteralM
]
2025-01-20 19:20:29 -06:00
parseListLiteralM :: ParserM TricuAST
parseListLiteralM = do
_ <- satisfyM (== LOpenBracket)
elements <- many parseListItemM
_ <- satisfyM (== LCloseBracket)
pure (SList elements)
parseListItemM :: ParserM TricuAST
parseListItemM = choice
[ parseGroupedItemM
, parseListLiteralM
, parseSingleItemM
]
2025-01-20 19:20:29 -06:00
parseGroupedItemM :: ParserM TricuAST
parseGroupedItemM = do
_ <- satisfyM (== LOpenParen)
inner <- parseExpressionM
_ <- satisfyM (== LCloseParen)
pure inner
2025-01-20 19:20:29 -06:00
parseSingleItemM :: ParserM TricuAST
parseSingleItemM = do
2025-01-21 14:21:47 -06:00
token <- satisfyM $ \case
LIdentifier _ -> True
LKeywordT -> True
_ -> False
case token of
2025-01-20 19:20:29 -06:00
LIdentifier name -> pure (SVar name)
LKeywordT -> pure TLeaf
_ -> fail "Unexpected token in list item"
parseVarM :: ParserM TricuAST
parseVarM = do
2025-01-21 14:21:47 -06:00
LIdentifier name <- satisfyM $ \case
LIdentifier _ -> True
_ -> False
2025-01-20 19:20:29 -06:00
if name == "t" || name == "__result"
then fail ("Reserved keyword: " ++ name ++ " cannot be assigned.")
else pure (SVar name)
parseIntLiteralM :: ParserM TricuAST
parseIntLiteralM = do
2025-01-21 14:21:47 -06:00
LIntegerLiteral value <- satisfyM $ \case
LIntegerLiteral _ -> True
_ -> False
2025-01-20 19:20:29 -06:00
pure (SInt value)
parseStrLiteralM :: ParserM TricuAST
parseStrLiteralM = do
2025-01-21 14:21:47 -06:00
LStringLiteral value <- satisfyM $ \case
LStringLiteral _ -> True
_ -> False
2025-01-20 19:20:29 -06:00
pure (SStr value)
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = error "Expected identifier"
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
2025-01-20 19:20:29 -06:00
let errors = bundleErrors bundle
errorList = Data.List.NonEmpty.toList errors
formattedErrs = map showError errorList
in unlines ("Parse error(s) encountered:" : formattedErrs)
showError :: ParseError [LToken] Void -> String
showError (TrivialError offset (Just (Tokens tokenStream)) expected) =
2025-01-20 19:20:29 -06:00
"Parse error at offset " ++ show offset
++ ": unexpected token " ++ show tokenStream
++ ", expected one of " ++ show (Set.toList expected)
showError (FancyError offset fancy) =
2025-01-20 19:20:29 -06:00
"Parse error at offset " ++ show offset ++ ":\n "
++ unlines (map show (Set.toList fancy))
showError (TrivialError offset Nothing expected) =
2025-01-20 19:20:29 -06:00
"Parse error at offset " ++ show offset
++ ": expected one of " ++ show (Set.toList expected)