module Parser where import Lexer import Research import Control.Monad (void) import Control.Monad.State import Data.List.NonEmpty (toList) import Data.Void (Void) import Text.Megaparsec import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty) import qualified Data.Set as Set data PState = PState { depth :: Int } deriving (Show) type ParserM = StateT PState (Parsec Void [LToken]) satisfyM :: (LToken -> Bool) -> ParserM LToken satisfyM f = do token <- lift (satisfy f) modify' (updateDepth token) return token updateDepth :: LToken -> PState -> PState updateDepth LOpenParen st = st { depth = depth st + 1 } updateDepth LCloseParen st = st { depth = max 0 (depth st - 1) } updateDepth _ st = st topLevelNewline :: ParserM () topLevelNewline = do st <- get if depth st == 0 then void (satisfyM (== LNewline)) else fail "Top-level exit in paren context" parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST] parseProgram tokens = runParser (evalStateT parseProgramM (PState 0)) "" tokens parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST parseSingleExpr tokens = runParser (evalStateT (scnParserM *> parseExpressionM <* eofM) (PState 0)) "" tokens parseTricu :: String -> [TricuAST] parseTricu input = case lexTricu input of [] -> [] toks -> case parseProgram toks of Left err -> error (handleParseError err) Right asts -> asts parseSingle :: String -> TricuAST parseSingle input = case lexTricu input of [] -> SEmpty toks -> case parseSingleExpr toks of Left err -> error (handleParseError err) Right ast -> ast parseProgramM :: ParserM [TricuAST] parseProgramM = do skipMany topLevelNewline exprs <- sepEndBy parseOneExpression (some topLevelNewline) skipMany topLevelNewline return exprs parseOneExpression :: ParserM TricuAST parseOneExpression = scnParserM *> parseExpressionM scnParserM :: ParserM () scnParserM = skipMany $ do t <- lookAhead anySingle st <- get if depth st > 0 && isNewline t then void (satisfyM isNewline) else fail "In paren context or no space token" <|> empty eofM :: ParserM () eofM = lift eof parseExpressionM :: ParserM TricuAST parseExpressionM = choice [ try parseFunctionM , try parseLambdaM , try parseLambdaExpressionM , try parseListLiteralM , try parseApplicationM , try parseTreeTermM , parseLiteralM ] parseFunctionM :: ParserM TricuAST parseFunctionM = do LIdentifier name <- satisfyM isIdentifier args <- many (satisfyM isIdentifier) _ <- satisfyM (== LAssign) scnParserM body <- parseExpressionM pure (SFunc name (map getIdentifier args) body) parseLambdaM :: ParserM TricuAST parseLambdaM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ do _ <- satisfyM (== LBackslash) param <- satisfyM isIdentifier rest <- many (satisfyM isIdentifier) _ <- satisfyM (== LColon) scnParserM body <- parseLambdaExpressionM let nested = foldr (\v acc -> SLambda [getIdentifier v] acc) body rest pure (SLambda [getIdentifier param] nested) parseLambdaExpressionM :: ParserM TricuAST parseLambdaExpressionM = choice [ try parseLambdaApplicationM , parseAtomicLambdaM ] parseAtomicLambdaM :: ParserM TricuAST parseAtomicLambdaM = choice [ parseVarM , parseTreeLeafM , parseLiteralM , parseListLiteralM , try parseLambdaM , between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM ] parseApplicationM :: ParserM TricuAST parseApplicationM = do func <- parseAtomicBaseM scnParserM args <- many $ do scnParserM arg <- parseAtomicM return arg return $ foldl SApp func args parseLambdaApplicationM :: ParserM TricuAST parseLambdaApplicationM = do func <- parseAtomicLambdaM scnParserM args <- many $ do arg <- parseAtomicLambdaM scnParserM pure arg pure $ foldl SApp func args parseAtomicBaseM :: ParserM TricuAST parseAtomicBaseM = choice [ parseTreeLeafM , parseGroupedM ] parseTreeLeafM :: ParserM TricuAST parseTreeLeafM = do _ <- satisfyM isKeywordT notFollowedBy (lift (satisfy (== LAssign))) pure TLeaf parseTreeTermM :: ParserM TricuAST parseTreeTermM = do base <- parseTreeLeafOrParenthesizedM rest <- many parseTreeLeafOrParenthesizedM pure (foldl combine base rest) where combine acc next | TLeaf <- acc = TStem next | TStem t <- acc = TFork t next | TFork _ _ <- acc = TFork acc next parseTreeLeafOrParenthesizedM :: ParserM TricuAST parseTreeLeafOrParenthesizedM = choice [ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM , parseTreeLeafM ] parseAtomicM :: ParserM TricuAST parseAtomicM = choice [ parseVarM , parseTreeLeafM , parseListLiteralM , parseGroupedM , parseLiteralM ] parseGroupedM :: ParserM TricuAST parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ scnParserM *> parseExpressionM <* scnParserM parseLiteralM :: ParserM TricuAST parseLiteralM = choice [ parseIntLiteralM , parseStrLiteralM ] parseListLiteralM :: ParserM TricuAST parseListLiteralM = do _ <- satisfyM (== LOpenBracket) elements <- many parseListItemM _ <- satisfyM (== LCloseBracket) pure (SList elements) parseListItemM :: ParserM TricuAST parseListItemM = choice [ parseGroupedItemM , parseListLiteralM , parseSingleItemM ] parseGroupedItemM :: ParserM TricuAST parseGroupedItemM = do _ <- satisfyM (== LOpenParen) inner <- parseExpressionM _ <- satisfyM (== LCloseParen) pure inner parseSingleItemM :: ParserM TricuAST parseSingleItemM = do token <- satisfyM isListItem case token of LIdentifier name -> pure (SVar name) LKeywordT -> pure TLeaf _ -> fail "Unexpected token in list item" parseVarM :: ParserM TricuAST parseVarM = do LIdentifier name <- satisfyM isIdentifier if name == "t" || name == "__result" then fail ("Reserved keyword: " ++ name ++ " cannot be assigned.") else pure (SVar name) parseIntLiteralM :: ParserM TricuAST parseIntLiteralM = do LIntegerLiteral value <- satisfyM isIntegerLiteral pure (SInt value) parseStrLiteralM :: ParserM TricuAST parseStrLiteralM = do LStringLiteral value <- satisfyM isStringLiteral pure (SStr value) isKeywordT :: LToken -> Bool isKeywordT LKeywordT = True isKeywordT _ = False isIdentifier :: LToken -> Bool isIdentifier (LIdentifier _) = True isIdentifier _ = False isIntegerLiteral :: LToken -> Bool isIntegerLiteral (LIntegerLiteral _) = True isIntegerLiteral _ = False isStringLiteral :: LToken -> Bool isStringLiteral (LStringLiteral _) = True isStringLiteral _ = False isLiteral :: LToken -> Bool isLiteral (LIntegerLiteral _) = True isLiteral (LStringLiteral _) = True isLiteral _ = False isListItem :: LToken -> Bool isListItem (LIdentifier _) = True isListItem LKeywordT = True isListItem _ = False isNewline :: LToken -> Bool isNewline LNewline = True isNewline _ = False getIdentifier :: LToken -> String getIdentifier (LIdentifier name) = name getIdentifier _ = error "Expected identifier" handleParseError :: ParseErrorBundle [LToken] Void -> String handleParseError bundle = let errors = bundleErrors bundle errorList = Data.List.NonEmpty.toList errors formattedErrs = map showError errorList in unlines ("Parse error(s) encountered:" : formattedErrs) showError :: ParseError [LToken] Void -> String showError (TrivialError offset (Just (Tokens tokenStream)) expected) = "Parse error at offset " ++ show offset ++ ": unexpected token " ++ show tokenStream ++ ", expected one of " ++ show (Set.toList expected) showError (FancyError offset fancy) = "Parse error at offset " ++ show offset ++ ":\n " ++ unlines (map show (Set.toList fancy)) showError (TrivialError offset Nothing expected) = "Parse error at offset " ++ show offset ++ ": expected one of " ++ show (Set.toList expected)