module Parser where import Lexer import Research import Control.Monad (void) import Control.Monad.State import Data.List.NonEmpty (toList) import Data.Void (Void) import Text.Megaparsec import qualified Data.Set as Set data PState = PState { parenDepth :: Int , bracketDepth :: Int } deriving (Show) type ParserM = StateT PState (Parsec Void [LToken]) satisfyM :: (LToken -> Bool) -> ParserM LToken satisfyM f = do tok <- lift (satisfy f) modify' (updateDepth tok) return tok updateDepth :: LToken -> PState -> PState updateDepth LOpenParen st = st { parenDepth = parenDepth st + 1 } updateDepth LOpenBracket st = st { bracketDepth = bracketDepth st + 1 } updateDepth LCloseParen st = st { parenDepth = parenDepth st - 1 } updateDepth LCloseBracket st = st { bracketDepth = bracketDepth st - 1 } updateDepth _ st = st topLevelNewline :: ParserM () topLevelNewline = do st <- get if parenDepth st == 0 && bracketDepth st == 0 then void (satisfyM (== LNewline)) else fail "Top-level exit in nested context (paren or bracket)" parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST] parseProgram toks = runParser (evalStateT (parseProgramM <* finalizeDepth <* eof) (PState 0 0)) "" toks parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST parseSingleExpr toks = runParser (evalStateT (scnParserM *> parseExpressionM <* finalizeDepth <* eof) (PState 0 0)) "" toks finalizeDepth :: ParserM () finalizeDepth = do st <- get case (parenDepth st, bracketDepth st) of (0, 0) -> pure () (p, b) -> fail $ "Unmatched tokens: " ++ show (p, b) parseTricu :: String -> [TricuAST] parseTricu input = case lexTricu input of [] -> [] toks -> case parseProgram toks of Left err -> errorWithoutStackTrace (handleParseError err) Right asts -> asts parseSingle :: String -> TricuAST parseSingle input = case lexTricu input of [] -> SEmpty toks -> case parseSingleExpr toks of Left err -> errorWithoutStackTrace (handleParseError err) Right ast -> ast parseProgramM :: ParserM [TricuAST] parseProgramM = do skipMany topLevelNewline importNodes <- many (do node <- parseImportM skipMany topLevelNewline return node) skipMany topLevelNewline exprs <- sepEndBy parseOneExpression (some topLevelNewline) skipMany topLevelNewline return (importNodes ++ exprs) parseImportM :: ParserM TricuAST parseImportM = do LImport filePath moduleName <- satisfyM isImport pure (SImport filePath moduleName) where isImport (LImport _ _) = True isImport _ = False parseOneExpression :: ParserM TricuAST parseOneExpression = scnParserM *> parseExpressionM scnParserM :: ParserM () scnParserM = skipMany $ do t <- lookAhead anySingle st <- get if | (parenDepth st > 0 || bracketDepth st > 0) && (t == LNewline) -> void $ satisfyM (== LNewline) | otherwise -> fail "In nested context or no space token" <|> empty eofM :: ParserM () eofM = lift eof parseExpressionM :: ParserM TricuAST parseExpressionM = choice [ try parseFunctionM , try parseLambdaM , try parseLambdaExpressionM , try parseListLiteralM , try parseApplicationM , try parseTreeTermM , parseLiteralM ] parseFunctionM :: ParserM TricuAST parseFunctionM = do let ident = (\case LIdentifier _ -> True; _ -> False) LIdentifier name <- satisfyM ident args <- many $ satisfyM ident _ <- satisfyM (== LAssign) scnParserM body <- parseExpressionM pure (SDef name (map getIdentifier args) body) parseLambdaM :: ParserM TricuAST parseLambdaM = do let ident = (\case LIdentifier _ -> True; _ -> False) params <- some (satisfyM ident) _ <- satisfyM (== LColon) scnParserM body <- parseLambdaExpressionM pure $ foldr (\param acc -> SLambda [getIdentifier param] acc) body params parseLambdaExpressionM :: ParserM TricuAST parseLambdaExpressionM = choice [ try parseLambdaApplicationM , parseAtomicLambdaM ] parseAtomicLambdaM :: ParserM TricuAST parseAtomicLambdaM = choice [ try parseLambdaM , parseVarM , parseTreeLeafM , parseLiteralM , parseListLiteralM , between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM ] parseApplicationM :: ParserM TricuAST parseApplicationM = do func <- parseAtomicBaseM scnParserM args <- many $ do scnParserM arg <- parseAtomicM return arg return $ foldl SApp func args parseLambdaApplicationM :: ParserM TricuAST parseLambdaApplicationM = do func <- parseAtomicLambdaM scnParserM args <- many $ do arg <- parseAtomicLambdaM scnParserM pure arg pure $ foldl SApp func args parseAtomicBaseM :: ParserM TricuAST parseAtomicBaseM = choice [ parseTreeLeafM , parseGroupedM ] parseTreeLeafM :: ParserM TricuAST parseTreeLeafM = do let keyword = (\case LKeywordT -> True; _ -> False) _ <- satisfyM keyword notFollowedBy $ lift $ satisfy (== LAssign) pure TLeaf parseTreeTermM :: ParserM TricuAST parseTreeTermM = do base <- parseTreeLeafOrParenthesizedM rest <- many parseTreeLeafOrParenthesizedM pure (foldl combine base rest) where combine acc next | TLeaf <- acc = TStem next | TStem t <- acc = TFork t next | TFork _ _ <- acc = TFork acc next | otherwise = SApp acc next parseTreeLeafOrParenthesizedM :: ParserM TricuAST parseTreeLeafOrParenthesizedM = choice [ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM , parseTreeLeafM ] parseAtomicM :: ParserM TricuAST parseAtomicM = choice [ try parseLambdaM , parseVarM , parseTreeLeafM , parseListLiteralM , parseGroupedM , parseLiteralM ] parseGroupedM :: ParserM TricuAST parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $ scnParserM *> parseExpressionM <* scnParserM parseLiteralM :: ParserM TricuAST parseLiteralM = choice [ parseIntLiteralM , parseStrLiteralM ] parseListLiteralM :: ParserM TricuAST parseListLiteralM = do _ <- satisfyM (== LOpenBracket) elements <- many $ do scnParserM parseListItemM scnParserM _ <- satisfyM (== LCloseBracket) pure (SList elements) parseListItemM :: ParserM TricuAST parseListItemM = choice [ parseGroupedItemM , parseListLiteralM , parseSingleItemM ] parseGroupedItemM :: ParserM TricuAST parseGroupedItemM = do _ <- satisfyM (== LOpenParen) inner <- parseExpressionM _ <- satisfyM (== LCloseParen) pure inner parseSingleItemM :: ParserM TricuAST parseSingleItemM = do tok <- satisfyM (\case LIdentifier _ -> True; LKeywordT -> True; _ -> False) if | LIdentifier name <- tok -> pure (SVar name Nothing) | tok == LKeywordT -> pure TLeaf | otherwise -> fail "Unexpected token in list item" parseVarM :: ParserM TricuAST parseVarM = do tok <- satisfyM (\case LNamespace _ -> True LIdentifier _ -> True LIdentifierWithHash _ _ -> True _ -> False) case tok of LNamespace ns -> do _ <- satisfyM (== LDot) LIdentifier name <- satisfyM (\case LIdentifier _ -> True; _ -> False) pure $ SVar (ns ++ "." ++ name) Nothing LIdentifier name | name == "t" || name == "!result" -> fail ("Reserved keyword: " ++ name ++ " cannot be assigned.") | otherwise -> pure (SVar name Nothing) LIdentifierWithHash name hash -> if name == "t" || name == "!result" then fail ("Reserved keyword: " ++ name ++ " cannot be assigned.") else pure (SVar name (Just hash)) _ -> fail "Unexpected token while parsing variable" parseIntLiteralM :: ParserM TricuAST parseIntLiteralM = do let intL = (\case LIntegerLiteral _ -> True; _ -> False) tok <- satisfyM intL if | LIntegerLiteral value <- tok -> pure (SInt (fromIntegral value)) | otherwise -> fail "Unexpected token while parsing integer literal" parseStrLiteralM :: ParserM TricuAST parseStrLiteralM = do let strL = (\case LStringLiteral _ -> True; _ -> False) tok <- satisfyM strL if | LStringLiteral value <- tok -> pure (SStr value) | otherwise -> fail "Unexpected token while parsing string literal" getIdentifier :: LToken -> String getIdentifier (LIdentifier name) = name getIdentifier _ = errorWithoutStackTrace "Expected identifier" handleParseError :: ParseErrorBundle [LToken] Void -> String handleParseError bundle = let errors = bundleErrors bundle formattedErrors = map formatError (Data.List.NonEmpty.toList errors) in unlines ("Parse error(s) encountered:" : formattedErrors) formatError :: ParseError [LToken] Void -> String formatError (TrivialError offset msgUnexpected expected) = let unexpectedMsg = case msgUnexpected of Just x -> "unexpected token " ++ show x Nothing -> "unexpected end of input" expectedMsg = if null expected then "" else "expected " ++ show (Set.toList expected) in "Parse error at offset " ++ show offset ++ ": " ++ unexpectedMsg ++ if null expectedMsg then "" else " " ++ expectedMsg formatError (FancyError offset _) = "Parse error at offset " ++ show offset ++ ": unexpected FancyError"