module Parser where import Lexer import Research import Control.Monad (void) import Control.Monad.State import Data.List.NonEmpty (toList) import Data.Void (Void) import Text.Megaparsec import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty) import qualified Data.Set as Set data PState = PState { parenDepth :: Int , bracketDepth :: Int } deriving (Show) type ParserM = StateT PState (Parsec Void [LToken]) satisfyM :: (LToken -> Bool) -> ParserM LToken satisfyM f = do token <- lift (satisfy f) modify' (updateDepth token) return token updateDepth :: LToken -> PState -> PState updateDepth LOpenParen st = st { parenDepth = parenDepth st + 1 } updateDepth LOpenBracket st = st { bracketDepth = bracketDepth st + 1 } updateDepth LCloseParen st = st { parenDepth = parenDepth st - 1 } updateDepth LCloseBracket st = st { bracketDepth = bracketDepth st - 1 } updateDepth _ st = st topLevelNewline :: ParserM () topLevelNewline = do st <- get if parenDepth st == 0 && bracketDepth st == 0 then void (satisfyM (== LNewline)) else fail "Top-level exit in nested context (paren or bracket)" parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST] parseProgram tokens = runParser (evalStateT (parseProgramM <* finalizeDepth <* eof) (PState 0 0)) "" tokens parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST parseSingleExpr tokens = runParser (evalStateT (scnParserM *> parseExpressionM <* finalizeDepth <* eof) (PState 0 0)) "" tokens finalizeDepth :: ParserM () finalizeDepth = do st <- get case (parenDepth st, bracketDepth st) of (0, 0) -> pure () (p, b) -> fail $ "Unmatched tokens: " ++ show (p, b) parseTricu :: String -> [TricuAST] parseTricu input = case lexTricu input of [] -> [] toks -> case parseProgram toks of Left err -> errorWithoutStackTrace (handleParseError err) Right asts -> asts parseSingle :: String -> TricuAST parseSingle input = case lexTricu input of [] -> SEmpty toks -> case parseSingleExpr toks of Left err -> errorWithoutStackTrace (handleParseError err) Right ast -> ast parseProgramM :: ParserM [TricuAST] parseProgramM = do skipMany topLevelNewline importNodes <- many (do node <- parseImportM skipMany topLevelNewline return node) skipMany topLevelNewline exprs <- sepEndBy parseOneExpression (some topLevelNewline) skipMany topLevelNewline return (importNodes ++ exprs) parseImportM :: ParserM TricuAST parseImportM = do LImport filePath moduleName <- satisfyM isImport pure (SImport filePath moduleName) where isImport (LImport _ _) = True isImport _ = False parseOneExpression :: ParserM TricuAST parseOneExpression = scnParserM *> parseExpressionM scnParserM :: ParserM () scnParserM = skipMany $ do t <- lookAhead anySingle st <- get if | (parenDepth st > 0 || bracketDepth st > 0) && (t == LNewline) -> void $ satisfyM (== LNewline) | otherwise -> fail "In nested context or no space token" <|> empty eofM :: ParserM () eofM = lift eof parseExpressionM :: ParserM TricuAST parseExpressionM = choice [ try parseFunctionM , try parseLambdaM , try parseLambdaExpressionM , try parseListLiteralM , try parseApplicationM , try parseTreeTermM , parseLiteralM ] parseFunctionM :: ParserM TricuAST parseFunctionM = do let ident = (\case LIdentifier _ -> True; _ -> False) LIdentifier name <- satisfyM ident args <- many $ satisfyM ident _ <- satisfyM (== LAssign) scnParserM body <- parseExpressionM pure (SDef name (map getIdentifier args) body) parseLambdaM :: ParserM TricuAST parseLambdaM = do let ident = (\case LIdentifier _ -> True; _ -> False) _ <- satisfyM (== LBackslash) params <- some (satisfyM ident) _ <- satisfyM (== LColon) scnParserM body <- parseLambdaExpressionM pure $ foldr (\param acc -> SLambda [getIdentifier param] acc) body params parseLambdaExpressionM :: ParserM TricuAST parseLambdaExpressionM = choice [ try parseLambdaApplicationM , parseAtomicLambdaM ] parseAtomicLambdaM :: ParserM TricuAST parseAtomicLambdaM = choice [ parseVarM , parseTreeLeafM , parseLiteralM , parseListLiteralM , try parseLambdaM , between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM ] parseApplicationM :: ParserM TricuAST parseApplicationM = do func <- parseAtomicBaseM scnParserM args <- many $ do scnParserM arg <- parseAtomicM return arg return $ foldl SApp func args parseLambdaApplicationM :: ParserM TricuAST parseLambdaApplicationM = do func <- parseAtomicLambdaM scnParserM args <- many $ do arg <- parseAtomicLambdaM scnParserM pure arg pure $ foldl SApp func args parseAtomicBaseM :: ParserM TricuAST parseAtomicBaseM = choice [ parseTreeLeafM , parseGroupedM ] parseTreeLeafM :: ParserM TricuAST parseTreeLeafM = do let keyword = (\case LKeywordT -> True; _ -> False) _ <- satisfyM keyword notFollowedBy $ lift $ satisfy (== LAssign) pure TLeaf parseTreeTermM :: ParserM TricuAST parseTreeTermM = do base <- parseTreeLeafOrParenthesizedM rest <- many parseTreeLeafOrParenthesizedM pure (foldl combine base rest) where combine acc next | TLeaf <- acc = TStem next | TStem t <- acc = TFork t next | TFork _ _ <- acc = TFork acc next parseTreeLeafOrParenthesizedM :: ParserM TricuAST parseTreeLeafOrParenthesizedM = choice [ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM , parseTreeLeafM ] parseAtomicM :: ParserM TricuAST parseAtomicM = choice [ parseVarM , parseTreeLeafM , parseListLiteralM , parseGroupedM , parseLiteralM ] parseGroupedM :: ParserM TricuAST parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ scnParserM *> parseExpressionM <* scnParserM parseLiteralM :: ParserM TricuAST parseLiteralM = choice [ parseIntLiteralM , parseStrLiteralM ] parseListLiteralM :: ParserM TricuAST parseListLiteralM = do _ <- satisfyM (== LOpenBracket) elements <- many $ do scnParserM parseListItemM scnParserM _ <- satisfyM (== LCloseBracket) pure (SList elements) parseListItemM :: ParserM TricuAST parseListItemM = choice [ parseGroupedItemM , parseListLiteralM , parseSingleItemM ] parseGroupedItemM :: ParserM TricuAST parseGroupedItemM = do _ <- satisfyM (== LOpenParen) inner <- parseExpressionM _ <- satisfyM (== LCloseParen) pure inner parseSingleItemM :: ParserM TricuAST parseSingleItemM = do token <- satisfyM (\case LIdentifier _ -> True; LKeywordT -> True; _ -> False) if | LIdentifier name <- token -> pure (SVar name) | token == LKeywordT -> pure TLeaf | otherwise -> fail "Unexpected token in list item" parseVarM :: ParserM TricuAST parseVarM = do token <- satisfyM (\case LNamespace _ -> True LIdentifier _ -> True _ -> False) case token of LNamespace ns -> do _ <- satisfyM (== LDot) LIdentifier name <- satisfyM (\case LIdentifier _ -> True; _ -> False) pure $ SVar (ns ++ "." ++ name) LIdentifier name | name == "t" || name == "!result" -> fail ("Reserved keyword: " ++ name ++ " cannot be assigned.") | otherwise -> pure (SVar name) _ -> fail "Unexpected token while parsing variable" parseIntLiteralM :: ParserM TricuAST parseIntLiteralM = do let intL = (\case LIntegerLiteral _ -> True; _ -> False) token <- satisfyM intL if | LIntegerLiteral value <- token -> pure (SInt value) | otherwise -> fail "Unexpected token while parsing integer literal" parseStrLiteralM :: ParserM TricuAST parseStrLiteralM = do let strL = (\case LStringLiteral _ -> True; _ -> False) token <- satisfyM strL if | LStringLiteral value <- token -> pure (SStr value) | otherwise -> fail "Unexpected token while parsing string literal" getIdentifier :: LToken -> String getIdentifier (LIdentifier name) = name getIdentifier _ = errorWithoutStackTrace "Expected identifier" handleParseError :: ParseErrorBundle [LToken] Void -> String handleParseError bundle = let errors = bundleErrors bundle formattedErrors = map formatError (Data.List.NonEmpty.toList errors) in unlines ("Parse error(s) encountered:" : formattedErrors) formatError :: ParseError [LToken] Void -> String formatError (TrivialError offset unexpected expected) = let unexpectedMsg = case unexpected of Just x -> "unexpected token " ++ show x Nothing -> "unexpected end of input" expectedMsg = if null expected then "" else "expected " ++ show (Set.toList expected) in "Parse error at offset " ++ show offset ++ ": " ++ unexpectedMsg ++ if null expectedMsg then "" else " " ++ expectedMsg formatError (FancyError offset _) = "Parse error at offset " ++ show offset ++ ": unexpected FancyError"