module Parser where import Lexer import Research import Control.Monad (void) import Control.Monad.State import Data.List.NonEmpty (toList) import Data.Void (Void) import Text.Megaparsec import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty) import qualified Data.Set as Set data PState = PState { depth :: Int } deriving (Show) type ParserM = StateT PState (Parsec Void [LToken]) satisfyM :: (LToken -> Bool) -> ParserM LToken satisfyM f = do token <- lift (satisfy f) modify' (updateDepth token) return token updateDepth :: LToken -> PState -> PState updateDepth LOpenParen st = st { depth = depth st + 1 } updateDepth LCloseParen st = st { depth = max 0 (depth st - 1) } updateDepth _ st = st topLevelNewline :: ParserM () topLevelNewline = do st <- get if depth st == 0 then void (satisfyM (== LNewline)) else fail "Top-level exit in paren context" parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST] parseProgram tokens = runParser (evalStateT parseProgramM (PState 0)) "" tokens parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST parseSingleExpr tokens = runParser (evalStateT (scnParserM *> parseExpressionM <* eofM) (PState 0)) "" tokens parseTricu :: String -> [TricuAST] parseTricu input = case lexTricu input of [] -> [] toks -> case parseProgram toks of Left err -> error (handleParseError err) Right asts -> asts parseSingle :: String -> TricuAST parseSingle input = case lexTricu input of [] -> SEmpty toks -> case parseSingleExpr toks of Left err -> error (handleParseError err) Right ast -> ast parseProgramM :: ParserM [TricuAST] parseProgramM = do skipMany topLevelNewline exprs <- sepEndBy parseOneExpression (some topLevelNewline) skipMany topLevelNewline return exprs parseOneExpression :: ParserM TricuAST parseOneExpression = scnParserM *> parseExpressionM scnParserM :: ParserM () scnParserM = skipMany $ do t <- lookAhead anySingle st <- get if | depth st > 0 && case t of LNewline -> True _ -> False -> void $ satisfyM $ \case LNewline -> True _ -> False | otherwise -> fail "In paren context or no space token" <|> empty eofM :: ParserM () eofM = lift eof parseExpressionM :: ParserM TricuAST parseExpressionM = choice [ try parseFunctionM , try parseLambdaM , try parseLambdaExpressionM , try parseListLiteralM , try parseApplicationM , try parseTreeTermM , parseLiteralM ] parseFunctionM :: ParserM TricuAST parseFunctionM = do LIdentifier name <- satisfyM $ \case LIdentifier _ -> True _ -> False args <- many $ satisfyM $ \case LIdentifier _ -> True _ -> False _ <- satisfyM (== LAssign) scnParserM body <- parseExpressionM pure (SFunc name (map getIdentifier args) body) parseLambdaM :: ParserM TricuAST parseLambdaM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ do _ <- satisfyM (== LBackslash) param <- satisfyM $ \case LIdentifier _ -> True _ -> False rest <- many $ satisfyM $ \case LIdentifier _ -> True _ -> False _ <- satisfyM (== LColon) scnParserM body <- parseLambdaExpressionM let nested = foldr (\v acc -> SLambda [getIdentifier v] acc) body rest pure (SLambda [getIdentifier param] nested) parseLambdaExpressionM :: ParserM TricuAST parseLambdaExpressionM = choice [ try parseLambdaApplicationM , parseAtomicLambdaM ] parseAtomicLambdaM :: ParserM TricuAST parseAtomicLambdaM = choice [ parseVarM , parseTreeLeafM , parseLiteralM , parseListLiteralM , try parseLambdaM , between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM ] parseApplicationM :: ParserM TricuAST parseApplicationM = do func <- parseAtomicBaseM scnParserM args <- many $ do scnParserM arg <- parseAtomicM return arg return $ foldl SApp func args parseLambdaApplicationM :: ParserM TricuAST parseLambdaApplicationM = do func <- parseAtomicLambdaM scnParserM args <- many $ do arg <- parseAtomicLambdaM scnParserM pure arg pure $ foldl SApp func args parseAtomicBaseM :: ParserM TricuAST parseAtomicBaseM = choice [ parseTreeLeafM , parseGroupedM ] parseTreeLeafM :: ParserM TricuAST parseTreeLeafM = do _ <- satisfyM $ \case LKeywordT -> True _ -> False notFollowedBy $ lift $ satisfy (== LAssign) pure TLeaf parseTreeTermM :: ParserM TricuAST parseTreeTermM = do base <- parseTreeLeafOrParenthesizedM rest <- many parseTreeLeafOrParenthesizedM pure (foldl combine base rest) where combine acc next | TLeaf <- acc = TStem next | TStem t <- acc = TFork t next | TFork _ _ <- acc = TFork acc next parseTreeLeafOrParenthesizedM :: ParserM TricuAST parseTreeLeafOrParenthesizedM = choice [ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM , parseTreeLeafM ] parseAtomicM :: ParserM TricuAST parseAtomicM = choice [ parseVarM , parseTreeLeafM , parseListLiteralM , parseGroupedM , parseLiteralM ] parseGroupedM :: ParserM TricuAST parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ scnParserM *> parseExpressionM <* scnParserM parseLiteralM :: ParserM TricuAST parseLiteralM = choice [ parseIntLiteralM , parseStrLiteralM ] parseListLiteralM :: ParserM TricuAST parseListLiteralM = do _ <- satisfyM (== LOpenBracket) elements <- many parseListItemM _ <- satisfyM (== LCloseBracket) pure (SList elements) parseListItemM :: ParserM TricuAST parseListItemM = choice [ parseGroupedItemM , parseListLiteralM , parseSingleItemM ] parseGroupedItemM :: ParserM TricuAST parseGroupedItemM = do _ <- satisfyM (== LOpenParen) inner <- parseExpressionM _ <- satisfyM (== LCloseParen) pure inner parseSingleItemM :: ParserM TricuAST parseSingleItemM = do token <- satisfyM $ \case LIdentifier _ -> True LKeywordT -> True _ -> False case token of LIdentifier name -> pure (SVar name) LKeywordT -> pure TLeaf _ -> fail "Unexpected token in list item" parseVarM :: ParserM TricuAST parseVarM = do LIdentifier name <- satisfyM $ \case LIdentifier _ -> True _ -> False if name == "t" || name == "__result" then fail ("Reserved keyword: " ++ name ++ " cannot be assigned.") else pure (SVar name) parseIntLiteralM :: ParserM TricuAST parseIntLiteralM = do LIntegerLiteral value <- satisfyM $ \case LIntegerLiteral _ -> True _ -> False pure (SInt value) parseStrLiteralM :: ParserM TricuAST parseStrLiteralM = do LStringLiteral value <- satisfyM $ \case LStringLiteral _ -> True _ -> False pure (SStr value) getIdentifier :: LToken -> String getIdentifier (LIdentifier name) = name getIdentifier _ = error "Expected identifier" handleParseError :: ParseErrorBundle [LToken] Void -> String handleParseError bundle = let errors = bundleErrors bundle errorList = Data.List.NonEmpty.toList errors formattedErrs = map showError errorList in unlines ("Parse error(s) encountered:" : formattedErrs) showError :: ParseError [LToken] Void -> String showError (TrivialError offset (Just (Tokens tokenStream)) expected) = "Parse error at offset " ++ show offset ++ ": unexpected token " ++ show tokenStream ++ ", expected one of " ++ show (Set.toList expected) showError (FancyError offset fancy) = "Parse error at offset " ++ show offset ++ ":\n " ++ unlines (map show (Set.toList fancy)) showError (TrivialError offset Nothing expected) = "Parse error at offset " ++ show offset ++ ": expected one of " ++ show (Set.toList expected)