tricu/src/Parser.hs
2025-01-20 19:20:29 -06:00

300 lines
8.0 KiB
Haskell

module Parser where
import Lexer
import Research
import Control.Monad (void)
import Control.Monad.State
import Data.List.NonEmpty (toList)
import Data.Void (Void)
import Text.Megaparsec
import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty)
import qualified Data.Set as Set
data PState = PState
{ depth :: Int
} deriving (Show)
type ParserM = StateT PState (Parsec Void [LToken])
satisfyM :: (LToken -> Bool) -> ParserM LToken
satisfyM f = do
token <- lift (satisfy f)
modify' (updateDepth token)
return token
updateDepth :: LToken -> PState -> PState
updateDepth LOpenParen st = st { depth = depth st + 1 }
updateDepth LCloseParen st = st { depth = max 0 (depth st - 1) }
updateDepth _ st = st
topLevelNewline :: ParserM ()
topLevelNewline = do
st <- get
if depth st == 0
then void (satisfyM (== LNewline))
else fail "Top-level exit in paren context"
parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST]
parseProgram tokens =
runParser (evalStateT parseProgramM (PState 0)) "" tokens
parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST
parseSingleExpr tokens =
runParser (evalStateT (scnParserM *> parseExpressionM <* eofM) (PState 0)) "" tokens
parseTricu :: String -> [TricuAST]
parseTricu input =
case lexTricu input of
[] -> []
toks ->
case parseProgram toks of
Left err -> error (handleParseError err)
Right asts -> asts
parseSingle :: String -> TricuAST
parseSingle input =
case lexTricu input of
[] -> SEmpty
toks ->
case parseSingleExpr toks of
Left err -> error (handleParseError err)
Right ast -> ast
parseProgramM :: ParserM [TricuAST]
parseProgramM = do
skipMany topLevelNewline
exprs <- sepEndBy parseOneExpression (some topLevelNewline)
skipMany topLevelNewline
return exprs
parseOneExpression :: ParserM TricuAST
parseOneExpression = scnParserM *> parseExpressionM
scnParserM :: ParserM ()
scnParserM = skipMany $ do
t <- lookAhead anySingle
st <- get
if depth st > 0 && isNewline t
then void (satisfyM isNewline)
else
fail "In paren context or no space token" <|> empty
eofM :: ParserM ()
eofM = lift eof
parseExpressionM :: ParserM TricuAST
parseExpressionM = choice
[ try parseFunctionM
, try parseLambdaM
, try parseLambdaExpressionM
, try parseListLiteralM
, try parseApplicationM
, try parseTreeTermM
, parseLiteralM
]
parseFunctionM :: ParserM TricuAST
parseFunctionM = do
LIdentifier name <- satisfyM isIdentifier
args <- many (satisfyM isIdentifier)
_ <- satisfyM (== LAssign)
scnParserM
body <- parseExpressionM
pure (SFunc name (map getIdentifier args) body)
parseLambdaM :: ParserM TricuAST
parseLambdaM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ do
_ <- satisfyM (== LBackslash)
param <- satisfyM isIdentifier
rest <- many (satisfyM isIdentifier)
_ <- satisfyM (== LColon)
scnParserM
body <- parseLambdaExpressionM
let nested = foldr (\v acc -> SLambda [getIdentifier v] acc) body rest
pure (SLambda [getIdentifier param] nested)
parseLambdaExpressionM :: ParserM TricuAST
parseLambdaExpressionM = choice
[ try parseLambdaApplicationM
, parseAtomicLambdaM
]
parseAtomicLambdaM :: ParserM TricuAST
parseAtomicLambdaM = choice
[ parseVarM
, parseTreeLeafM
, parseLiteralM
, parseListLiteralM
, try parseLambdaM
, between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM
]
parseApplicationM :: ParserM TricuAST
parseApplicationM = do
func <- parseAtomicBaseM
scnParserM
args <- many $ do
scnParserM
arg <- parseAtomicM
return arg
return $ foldl SApp func args
parseLambdaApplicationM :: ParserM TricuAST
parseLambdaApplicationM = do
func <- parseAtomicLambdaM
scnParserM
args <- many $ do
arg <- parseAtomicLambdaM
scnParserM
pure arg
pure $ foldl SApp func args
parseAtomicBaseM :: ParserM TricuAST
parseAtomicBaseM = choice
[ parseTreeLeafM
, parseGroupedM
]
parseTreeLeafM :: ParserM TricuAST
parseTreeLeafM = do
_ <- satisfyM isKeywordT
notFollowedBy (lift (satisfy (== LAssign)))
pure TLeaf
parseTreeTermM :: ParserM TricuAST
parseTreeTermM = do
base <- parseTreeLeafOrParenthesizedM
rest <- many parseTreeLeafOrParenthesizedM
pure (foldl combine base rest)
where
combine acc next
| TLeaf <- acc = TStem next
| TStem t <- acc = TFork t next
| TFork _ _ <- acc = TFork acc next
parseTreeLeafOrParenthesizedM :: ParserM TricuAST
parseTreeLeafOrParenthesizedM = choice
[ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM
, parseTreeLeafM
]
parseAtomicM :: ParserM TricuAST
parseAtomicM = choice
[ parseVarM
, parseTreeLeafM
, parseListLiteralM
, parseGroupedM
, parseLiteralM
]
parseGroupedM :: ParserM TricuAST
parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $
scnParserM *> parseExpressionM <* scnParserM
parseLiteralM :: ParserM TricuAST
parseLiteralM = choice
[ parseIntLiteralM
, parseStrLiteralM
]
parseListLiteralM :: ParserM TricuAST
parseListLiteralM = do
_ <- satisfyM (== LOpenBracket)
elements <- many parseListItemM
_ <- satisfyM (== LCloseBracket)
pure (SList elements)
parseListItemM :: ParserM TricuAST
parseListItemM = choice
[ parseGroupedItemM
, parseListLiteralM
, parseSingleItemM
]
parseGroupedItemM :: ParserM TricuAST
parseGroupedItemM = do
_ <- satisfyM (== LOpenParen)
inner <- parseExpressionM
_ <- satisfyM (== LCloseParen)
pure inner
parseSingleItemM :: ParserM TricuAST
parseSingleItemM = do
token <- satisfyM isListItem
case token of
LIdentifier name -> pure (SVar name)
LKeywordT -> pure TLeaf
_ -> fail "Unexpected token in list item"
parseVarM :: ParserM TricuAST
parseVarM = do
LIdentifier name <- satisfyM isIdentifier
if name == "t" || name == "__result"
then fail ("Reserved keyword: " ++ name ++ " cannot be assigned.")
else pure (SVar name)
parseIntLiteralM :: ParserM TricuAST
parseIntLiteralM = do
LIntegerLiteral value <- satisfyM isIntegerLiteral
pure (SInt value)
parseStrLiteralM :: ParserM TricuAST
parseStrLiteralM = do
LStringLiteral value <- satisfyM isStringLiteral
pure (SStr value)
isKeywordT :: LToken -> Bool
isKeywordT LKeywordT = True
isKeywordT _ = False
isIdentifier :: LToken -> Bool
isIdentifier (LIdentifier _) = True
isIdentifier _ = False
isIntegerLiteral :: LToken -> Bool
isIntegerLiteral (LIntegerLiteral _) = True
isIntegerLiteral _ = False
isStringLiteral :: LToken -> Bool
isStringLiteral (LStringLiteral _) = True
isStringLiteral _ = False
isLiteral :: LToken -> Bool
isLiteral (LIntegerLiteral _) = True
isLiteral (LStringLiteral _) = True
isLiteral _ = False
isListItem :: LToken -> Bool
isListItem (LIdentifier _) = True
isListItem LKeywordT = True
isListItem _ = False
isNewline :: LToken -> Bool
isNewline LNewline = True
isNewline _ = False
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = error "Expected identifier"
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
let errors = bundleErrors bundle
errorList = Data.List.NonEmpty.toList errors
formattedErrs = map showError errorList
in unlines ("Parse error(s) encountered:" : formattedErrs)
showError :: ParseError [LToken] Void -> String
showError (TrivialError offset (Just (Tokens tokenStream)) expected) =
"Parse error at offset " ++ show offset
++ ": unexpected token " ++ show tokenStream
++ ", expected one of " ++ show (Set.toList expected)
showError (FancyError offset fancy) =
"Parse error at offset " ++ show offset ++ ":\n "
++ unlines (map show (Set.toList fancy))
showError (TrivialError offset Nothing expected) =
"Parse error at offset " ++ show offset
++ ": expected one of " ++ show (Set.toList expected)