tricu/src/Parser.hs

309 lines
8.5 KiB
Haskell

module Parser where
import Lexer
import Research
import Control.Monad (void)
import Control.Monad.State
import Data.List.NonEmpty (toList)
import Data.Void (Void)
import Text.Megaparsec
import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty)
import qualified Data.Set as Set
data PState = PState
{ parenDepth :: Int
, bracketDepth :: Int
} deriving (Show)
type ParserM = StateT PState (Parsec Void [LToken])
satisfyM :: (LToken -> Bool) -> ParserM LToken
satisfyM f = do
token <- lift (satisfy f)
modify' (updateDepth token)
return token
updateDepth :: LToken -> PState -> PState
updateDepth LOpenParen st = st { parenDepth = parenDepth st + 1 }
updateDepth LCloseParen st
| parenDepth st > 0 = st { parenDepth = parenDepth st - 1 }
| otherwise = errorWithoutStackTrace "Unmatched closing parentheses"
updateDepth LOpenBracket st = st { bracketDepth = bracketDepth st + 1 }
updateDepth LCloseBracket st
| bracketDepth st > 0 = st { bracketDepth = bracketDepth st - 1 }
| otherwise = errorWithoutStackTrace "Unmatched closing bracket"
updateDepth _ st = st
topLevelNewline :: ParserM ()
topLevelNewline = do
st <- get
if parenDepth st == 0 && bracketDepth st == 0
then void (satisfyM (== LNewline))
else fail "Top-level exit in nested context (paren or bracket)"
parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST]
parseProgram tokens =
runParser (evalStateT (parseProgramM <* finalizeDepth <* eof) (PState 0 0)) "" tokens
parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST
parseSingleExpr tokens =
runParser (evalStateT (scnParserM *> parseExpressionM <* finalizeDepth <* eof) (PState 0 0)) "" tokens
finalizeDepth :: ParserM ()
finalizeDepth = do
st <- get
case (parenDepth st, bracketDepth st) of
(0, 0) -> pure ()
(p, b) -> fail $ "Unmatched tokens: " ++ show (p, b)
parseTricu :: String -> [TricuAST]
parseTricu input =
case lexTricu input of
[] -> []
toks ->
case parseProgram toks of
Left err -> errorWithoutStackTrace (handleParseError err)
Right asts -> asts
parseSingle :: String -> TricuAST
parseSingle input =
case lexTricu input of
[] -> SEmpty
toks ->
case parseSingleExpr toks of
Left err -> errorWithoutStackTrace (handleParseError err)
Right ast -> ast
parseProgramM :: ParserM [TricuAST]
parseProgramM = do
skipMany topLevelNewline
exprs <- sepEndBy parseOneExpression (some topLevelNewline)
skipMany topLevelNewline
return exprs
parseOneExpression :: ParserM TricuAST
parseOneExpression = scnParserM *> parseExpressionM
scnParserM :: ParserM ()
scnParserM = skipMany $ do
t <- lookAhead anySingle
st <- get
if | (parenDepth st > 0 || bracketDepth st > 0) && case t of
LNewline -> True
_ -> False -> void $ satisfyM $ \case
LNewline -> True
_ -> False
| otherwise -> fail "In nested context or no space token" <|> empty
eofM :: ParserM ()
eofM = lift eof
parseExpressionM :: ParserM TricuAST
parseExpressionM = choice
[ try parseFunctionM
, try parseLambdaM
, try parseLambdaExpressionM
, try parseListLiteralM
, try parseApplicationM
, try parseTreeTermM
, parseLiteralM
]
parseFunctionM :: ParserM TricuAST
parseFunctionM = do
LIdentifier name <- satisfyM $ \case
LIdentifier _ -> True
_ -> False
args <- many $ satisfyM $ \case
LIdentifier _ -> True
_ -> False
_ <- satisfyM (== LAssign)
scnParserM
body <- parseExpressionM
pure (SFunc name (map getIdentifier args) body)
parseLambdaM :: ParserM TricuAST
parseLambdaM =
between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ do
_ <- satisfyM (== LBackslash)
param <- satisfyM $ \case
LIdentifier _ -> True
_ -> False
rest <- many $ satisfyM $ \case
LIdentifier _ -> True
_ -> False
_ <- satisfyM (== LColon)
scnParserM
body <- parseLambdaExpressionM
let nested = foldr (\v acc -> SLambda [getIdentifier v] acc) body rest
pure (SLambda [getIdentifier param] nested)
parseLambdaExpressionM :: ParserM TricuAST
parseLambdaExpressionM = choice
[ try parseLambdaApplicationM
, parseAtomicLambdaM
]
parseAtomicLambdaM :: ParserM TricuAST
parseAtomicLambdaM = choice
[ parseVarM
, parseTreeLeafM
, parseLiteralM
, parseListLiteralM
, try parseLambdaM
, between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM
]
parseApplicationM :: ParserM TricuAST
parseApplicationM = do
func <- parseAtomicBaseM
scnParserM
args <- many $ do
scnParserM
arg <- parseAtomicM
return arg
return $ foldl SApp func args
parseLambdaApplicationM :: ParserM TricuAST
parseLambdaApplicationM = do
func <- parseAtomicLambdaM
scnParserM
args <- many $ do
arg <- parseAtomicLambdaM
scnParserM
pure arg
pure $ foldl SApp func args
parseAtomicBaseM :: ParserM TricuAST
parseAtomicBaseM = choice
[ parseTreeLeafM
, parseGroupedM
]
parseTreeLeafM :: ParserM TricuAST
parseTreeLeafM = do
_ <- satisfyM $ \case
LKeywordT -> True
_ -> False
notFollowedBy $ lift $ satisfy (== LAssign)
pure TLeaf
parseTreeTermM :: ParserM TricuAST
parseTreeTermM = do
base <- parseTreeLeafOrParenthesizedM
rest <- many parseTreeLeafOrParenthesizedM
pure (foldl combine base rest)
where
combine acc next
| TLeaf <- acc = TStem next
| TStem t <- acc = TFork t next
| TFork _ _ <- acc = TFork acc next
parseTreeLeafOrParenthesizedM :: ParserM TricuAST
parseTreeLeafOrParenthesizedM = choice
[ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM
, parseTreeLeafM
]
parseAtomicM :: ParserM TricuAST
parseAtomicM = choice
[ parseVarM
, parseTreeLeafM
, parseListLiteralM
, parseGroupedM
, parseLiteralM
]
parseGroupedM :: ParserM TricuAST
parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $
scnParserM *> parseExpressionM <* scnParserM
parseLiteralM :: ParserM TricuAST
parseLiteralM = choice
[ parseIntLiteralM
, parseStrLiteralM
]
parseListLiteralM :: ParserM TricuAST
parseListLiteralM = do
_ <- satisfyM (== LOpenBracket)
elements <- many $ do
scnParserM
parseListItemM
scnParserM
_ <- satisfyM (== LCloseBracket)
pure (SList elements)
parseListItemM :: ParserM TricuAST
parseListItemM = choice
[ parseGroupedItemM
, parseListLiteralM
, parseSingleItemM
]
parseGroupedItemM :: ParserM TricuAST
parseGroupedItemM = do
_ <- satisfyM (== LOpenParen)
inner <- parseExpressionM
_ <- satisfyM (== LCloseParen)
pure inner
parseSingleItemM :: ParserM TricuAST
parseSingleItemM = do
token <- satisfyM $ \case
LIdentifier _ -> True
LKeywordT -> True
_ -> False
case token of
LIdentifier name -> pure (SVar name)
LKeywordT -> pure TLeaf
_ -> fail "Unexpected token in list item"
parseVarM :: ParserM TricuAST
parseVarM = do
LIdentifier name <- satisfyM $ \case
LIdentifier _ -> True
_ -> False
if name == "t" || name == "__result"
then fail ("Reserved keyword: " ++ name ++ " cannot be assigned.")
else pure (SVar name)
parseIntLiteralM :: ParserM TricuAST
parseIntLiteralM = do
LIntegerLiteral value <- satisfyM $ \case
LIntegerLiteral _ -> True
_ -> False
pure (SInt value)
parseStrLiteralM :: ParserM TricuAST
parseStrLiteralM = do
LStringLiteral value <- satisfyM $ \case
LStringLiteral _ -> True
_ -> False
pure (SStr value)
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = errorWithoutStackTrace "Expected identifier"
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
let errors = bundleErrors bundle
formattedErrors = map formatError (Data.List.NonEmpty.toList errors)
in unlines ("Parse error(s) encountered:" : formattedErrors)
formatError :: ParseError [LToken] Void -> String
formatError (TrivialError offset unexpected expected) =
let unexpectedMsg = case unexpected of
Just x -> "unexpected token " ++ show x
Nothing -> "unexpected end of input"
expectedMsg = if null expected
then ""
else "expected " ++ show (Set.toList expected)
in "Parse error at offset " ++ show offset ++ ": " ++ unexpectedMsg ++
if null expectedMsg then "" else " " ++ expectedMsg
formatError (FancyError offset _) =
"Parse error at offset " ++ show offset ++ ": unexpected FancyError"