module Parser where import Lexer import Research import Control.Monad (void) import Control.Monad.State import Data.List.NonEmpty (toList) import Data.Void (Void) import Text.Megaparsec import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty) import qualified Data.Set as Set data PState = PState { parenDepth :: Int , bracketDepth :: Int } deriving (Show) type ParserM = StateT PState (Parsec Void [LToken]) satisfyM :: (LToken -> Bool) -> ParserM LToken satisfyM f = do token <- lift (satisfy f) modify' (updateDepth token) return token updateDepth :: LToken -> PState -> PState updateDepth LOpenParen st = st { parenDepth = parenDepth st + 1 } updateDepth LCloseParen st | parenDepth st > 0 = st { parenDepth = parenDepth st - 1 } | otherwise = errorWithoutStackTrace "Unmatched closing parentheses" updateDepth LOpenBracket st = st { bracketDepth = bracketDepth st + 1 } updateDepth LCloseBracket st | bracketDepth st > 0 = st { bracketDepth = bracketDepth st - 1 } | otherwise = errorWithoutStackTrace "Unmatched closing bracket" updateDepth _ st = st topLevelNewline :: ParserM () topLevelNewline = do st <- get if parenDepth st == 0 && bracketDepth st == 0 then void (satisfyM (== LNewline)) else fail "Top-level exit in nested context (paren or bracket)" parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST] parseProgram tokens = runParser (evalStateT (parseProgramM <* finalizeDepth <* eof) (PState 0 0)) "" tokens parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST parseSingleExpr tokens = runParser (evalStateT (scnParserM *> parseExpressionM <* finalizeDepth <* eof) (PState 0 0)) "" tokens finalizeDepth :: ParserM () finalizeDepth = do st <- get case (parenDepth st, bracketDepth st) of (0, 0) -> pure () (p, b) -> fail $ "Unmatched tokens: " ++ show (p, b) parseTricu :: String -> [TricuAST] parseTricu input = case lexTricu input of [] -> [] toks -> case parseProgram toks of Left err -> errorWithoutStackTrace (handleParseError err) Right asts -> asts parseSingle :: String -> TricuAST parseSingle input = case lexTricu input of [] -> SEmpty toks -> case parseSingleExpr toks of Left err -> errorWithoutStackTrace (handleParseError err) Right ast -> ast parseProgramM :: ParserM [TricuAST] parseProgramM = do skipMany topLevelNewline exprs <- sepEndBy parseOneExpression (some topLevelNewline) skipMany topLevelNewline return exprs parseOneExpression :: ParserM TricuAST parseOneExpression = scnParserM *> parseExpressionM scnParserM :: ParserM () scnParserM = skipMany $ do t <- lookAhead anySingle st <- get if | (parenDepth st > 0 || bracketDepth st > 0) && case t of LNewline -> True _ -> False -> void $ satisfyM $ \case LNewline -> True _ -> False | otherwise -> fail "In nested context or no space token" <|> empty eofM :: ParserM () eofM = lift eof parseExpressionM :: ParserM TricuAST parseExpressionM = choice [ try parseFunctionM , try parseLambdaM , try parseLambdaExpressionM , try parseListLiteralM , try parseApplicationM , try parseTreeTermM , parseLiteralM ] parseFunctionM :: ParserM TricuAST parseFunctionM = do LIdentifier name <- satisfyM $ \case LIdentifier _ -> True _ -> False args <- many $ satisfyM $ \case LIdentifier _ -> True _ -> False _ <- satisfyM (== LAssign) scnParserM body <- parseExpressionM pure (SFunc name (map getIdentifier args) body) parseLambdaM :: ParserM TricuAST parseLambdaM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ do _ <- satisfyM (== LBackslash) param <- satisfyM $ \case LIdentifier _ -> True _ -> False rest <- many $ satisfyM $ \case LIdentifier _ -> True _ -> False _ <- satisfyM (== LColon) scnParserM body <- parseLambdaExpressionM let nested = foldr (\v acc -> SLambda [getIdentifier v] acc) body rest pure (SLambda [getIdentifier param] nested) parseLambdaExpressionM :: ParserM TricuAST parseLambdaExpressionM = choice [ try parseLambdaApplicationM , parseAtomicLambdaM ] parseAtomicLambdaM :: ParserM TricuAST parseAtomicLambdaM = choice [ parseVarM , parseTreeLeafM , parseLiteralM , parseListLiteralM , try parseLambdaM , between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM ] parseApplicationM :: ParserM TricuAST parseApplicationM = do func <- parseAtomicBaseM scnParserM args <- many $ do scnParserM arg <- parseAtomicM return arg return $ foldl SApp func args parseLambdaApplicationM :: ParserM TricuAST parseLambdaApplicationM = do func <- parseAtomicLambdaM scnParserM args <- many $ do arg <- parseAtomicLambdaM scnParserM pure arg pure $ foldl SApp func args parseAtomicBaseM :: ParserM TricuAST parseAtomicBaseM = choice [ parseTreeLeafM , parseGroupedM ] parseTreeLeafM :: ParserM TricuAST parseTreeLeafM = do _ <- satisfyM $ \case LKeywordT -> True _ -> False notFollowedBy $ lift $ satisfy (== LAssign) pure TLeaf parseTreeTermM :: ParserM TricuAST parseTreeTermM = do base <- parseTreeLeafOrParenthesizedM rest <- many parseTreeLeafOrParenthesizedM pure (foldl combine base rest) where combine acc next | TLeaf <- acc = TStem next | TStem t <- acc = TFork t next | TFork _ _ <- acc = TFork acc next parseTreeLeafOrParenthesizedM :: ParserM TricuAST parseTreeLeafOrParenthesizedM = choice [ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM , parseTreeLeafM ] parseAtomicM :: ParserM TricuAST parseAtomicM = choice [ parseVarM , parseTreeLeafM , parseListLiteralM , parseGroupedM , parseLiteralM ] parseGroupedM :: ParserM TricuAST parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ scnParserM *> parseExpressionM <* scnParserM parseLiteralM :: ParserM TricuAST parseLiteralM = choice [ parseIntLiteralM , parseStrLiteralM ] parseListLiteralM :: ParserM TricuAST parseListLiteralM = do _ <- satisfyM (== LOpenBracket) elements <- many $ do scnParserM parseListItemM scnParserM _ <- satisfyM (== LCloseBracket) pure (SList elements) parseListItemM :: ParserM TricuAST parseListItemM = choice [ parseGroupedItemM , parseListLiteralM , parseSingleItemM ] parseGroupedItemM :: ParserM TricuAST parseGroupedItemM = do _ <- satisfyM (== LOpenParen) inner <- parseExpressionM _ <- satisfyM (== LCloseParen) pure inner parseSingleItemM :: ParserM TricuAST parseSingleItemM = do token <- satisfyM $ \case LIdentifier _ -> True LKeywordT -> True _ -> False case token of LIdentifier name -> pure (SVar name) LKeywordT -> pure TLeaf _ -> fail "Unexpected token in list item" parseVarM :: ParserM TricuAST parseVarM = do LIdentifier name <- satisfyM $ \case LIdentifier _ -> True _ -> False if name == "t" || name == "__result" then fail ("Reserved keyword: " ++ name ++ " cannot be assigned.") else pure (SVar name) parseIntLiteralM :: ParserM TricuAST parseIntLiteralM = do LIntegerLiteral value <- satisfyM $ \case LIntegerLiteral _ -> True _ -> False pure (SInt value) parseStrLiteralM :: ParserM TricuAST parseStrLiteralM = do LStringLiteral value <- satisfyM $ \case LStringLiteral _ -> True _ -> False pure (SStr value) getIdentifier :: LToken -> String getIdentifier (LIdentifier name) = name getIdentifier _ = errorWithoutStackTrace "Expected identifier" handleParseError :: ParseErrorBundle [LToken] Void -> String handleParseError bundle = let errors = bundleErrors bundle formattedErrors = map formatError (Data.List.NonEmpty.toList errors) in unlines ("Parse error(s) encountered:" : formattedErrors) formatError :: ParseError [LToken] Void -> String formatError (TrivialError offset unexpected expected) = let unexpectedMsg = case unexpected of Just x -> "unexpected token " ++ show x Nothing -> "unexpected end of input" expectedMsg = if null expected then "" else "expected " ++ show (Set.toList expected) in "Parse error at offset " ++ show offset ++ ": " ++ unexpectedMsg ++ if null expectedMsg then "" else " " ++ expectedMsg formatError (FancyError offset _) = "Parse error at offset " ++ show offset ++ ": unexpected FancyError"