module Parser where import Lexer import Research import Control.Monad (void) import Data.Void (Void) import Text.Megaparsec import qualified Data.List.NonEmpty as NE import qualified Data.Set as Set type TokParser = Parsec Void [LToken] data Context = Top | Nested deriving (Eq, Show) reservedNames :: Set.Set String reservedNames = Set.fromList ["t", "!result"] parseTricu :: String -> [TricuAST] parseTricu input = let toks = lexTricu input in case runParser programP "" toks of Left err -> errorWithoutStackTrace (handleParseError toks err) Right asts -> asts parseSingle :: String -> TricuAST parseSingle input = let toks = lexTricu input in case parseSingleExpr toks of Left err -> errorWithoutStackTrace (handleParseError toks err) Right ast -> ast parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST] parseProgram = runParser programP "" parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST parseSingleExpr = runParser singleP "" programP :: TokParser [TricuAST] programP = do skipTopNewlines imports <- many (importP <* skipTopNewlines) items <- manyItemsP eof pure (imports ++ items) singleP :: TokParser TricuAST singleP = do skipTopNewlines item <- topItemP skipTopNewlines eof pure item manyItemsP :: TokParser [TricuAST] manyItemsP = do skipTopNewlines done <- atEndP if done then pure [] else do item <- topItemP skipTopNewlines rest <- manyItemsP pure (item : rest) topItemP :: TokParser TricuAST topItemP = do toks <- getInput case toks of LIdentifier _ : LAssign : _ -> definitionP _ -> exprTopP definitionP :: TokParser TricuAST definitionP = do name <- identifierNameP void (tok (== LAssign) "=") skipNestedNewlines body <- exprTopP pure (SDef name [] body) importP :: TokParser TricuAST importP = do t <- tok isImport "import" case t of LImport path ns -> pure (SImport path ns) _ -> fail "internal parser error: expected import token" where isImport (LImport _ _) = True isImport _ = False exprTopP :: TokParser TricuAST exprTopP = do toks <- getInput case lambdaHeadTop toks of Just params -> lambdaP Top params Nothing -> pipeTopP exprNestedP :: TokParser TricuAST exprNestedP = do skipNestedNewlines toks <- getInput case lambdaHeadNested toks of Just params -> lambdaP Nested params Nothing -> pipeNestedP lambdaP :: Context -> [String] -> TokParser TricuAST lambdaP ctx params = do consumeLambdaHead ctx params body <- case ctx of Top -> exprTopP Nested -> exprNestedP pure (foldr (\p acc -> SLambda [p] acc) body params) lambdaHeadTop :: [LToken] -> Maybe [String] lambdaHeadTop toks = case collectIdentifiersNoNewlines toks of (params@(_:_), LColon : _) -> Just params _ -> Nothing lambdaHeadNested :: [LToken] -> Maybe [String] lambdaHeadNested toks = case collectIdentifiersWithNewlines (dropNewlines toks) of (params@(_:_), rest) -> case dropNewlines rest of LColon : _ -> Just params _ -> Nothing _ -> Nothing collectIdentifiersNoNewlines :: [LToken] -> ([String], [LToken]) collectIdentifiersNoNewlines (LIdentifier name : rest) = let (names, final) = collectIdentifiersNoNewlines rest in (name : names, final) collectIdentifiersNoNewlines rest = ([], rest) collectIdentifiersWithNewlines :: [LToken] -> ([String], [LToken]) collectIdentifiersWithNewlines (LIdentifier name : rest) = let (names, final) = collectIdentifiersWithNewlines (dropNewlines rest) in (name : names, final) collectIdentifiersWithNewlines rest = ([], rest) consumeLambdaHead :: Context -> [String] -> TokParser () consumeLambdaHead ctx params = do case ctx of Top -> pure () Nested -> skipNestedNewlines mapM_ consumeParam params case ctx of Top -> pure () Nested -> skipNestedNewlines void (tok (== LColon) ":") skipNestedNewlines where consumeParam _ = do void identifierNameP case ctx of Top -> pure () Nested -> skipNestedNewlines data PipeOp = PipeBackward | PipeForward deriving (Eq, Show) applyPipe :: TricuAST -> (PipeOp, TricuAST) -> TricuAST applyPipe acc (PipeBackward, rhs) = SApp acc rhs applyPipe acc (PipeForward, rhs) = SApp rhs acc pipeTopP :: TokParser TricuAST pipeTopP = pipeChainP appTopP appNestedP pipeNestedP :: TokParser TricuAST pipeNestedP = pipeChainP appNestedP appNestedP pipeChainP :: TokParser TricuAST -> TokParser TricuAST -> TokParser TricuAST pipeChainP parseFirst parseOperand = do first <- parseFirst rest <- many (try pipeSegmentP) pure (foldl applyPipe first rest) where pipeSegmentP = do skipNestedNewlines op <- pipeOpP skipNestedNewlines rhs <- parseOperand pure (op, rhs) pipeOpP :: TokParser PipeOp pipeOpP = (tok (== LArrowLeft) "<|" *> pure PipeBackward) <|> (tok (== LArrowRight) "|>" *> pure PipeForward) appTopP :: TokParser TricuAST appTopP = do first <- atomTopP appRestTopP first appRestTopP :: TricuAST -> TokParser TricuAST appRestTopP acc = do mt <- peekP case mt of Just t | startsAtom t -> do arg <- atomTopP appRestTopP (SApp acc arg) _ -> pure acc appNestedP :: TokParser TricuAST appNestedP = do first <- atomNestedP appRestNestedP first appRestNestedP :: TricuAST -> TokParser TricuAST appRestNestedP acc = do skipNestedNewlines mt <- peekP case mt of Just t | startsAtom t -> do arg <- atomNestedP appRestNestedP (SApp acc arg) _ -> pure acc startsAtom :: LToken -> Bool startsAtom LOpenParen = True startsAtom LOpenBracket = True startsAtom (LIdentifier _) = True startsAtom (LIdentifierWithHash _ _) = True startsAtom (LNamespace _) = True startsAtom LKeywordT = True startsAtom (LIntegerLiteral _) = True startsAtom (LStringLiteral _) = True startsAtom _ = False atomTopP :: TokParser TricuAST atomTopP = do toks <- getInput case toks of LOpenParen : _ -> groupedP LOpenBracket : _ -> listP LNamespace _ : LDot : _ -> namespacedVarP LIdentifier _ : _ -> plainVarP LIdentifierWithHash _ _ : _ -> plainVarP LKeywordT : _ -> leafP LIntegerLiteral _ : _ -> intP LStringLiteral _ : _ -> strP _ -> fail "expected expression atom" atomNestedP :: TokParser TricuAST atomNestedP = skipNestedNewlines *> atomTopP groupedP :: TokParser TricuAST groupedP = do void (tok (== LOpenParen) "(") skipNestedNewlines expr <- exprNestedP skipNestedNewlines void (tok (== LCloseParen) ")") pure expr listP :: TokParser TricuAST listP = do void (tok (== LOpenBracket) "[") skipNestedNewlines xs <- listElementsP skipNestedNewlines void (tok (== LCloseBracket) "]") pure (SList xs) listElementsP :: TokParser [TricuAST] listElementsP = do skipNestedNewlines mt <- peekP case mt of Just LCloseBracket -> pure [] Just t | startsAtom t -> do x <- listElementP xs <- listElementsP pure (x : xs) _ -> pure [] listElementP :: TokParser TricuAST listElementP = do toks <- getInput case toks of LOpenParen : _ -> groupedP LOpenBracket : _ -> listP LNamespace _ : LDot : _ -> namespacedVarP LIdentifier _ : _ -> plainVarP LIdentifierWithHash _ _ : _ -> plainVarP LKeywordT : _ -> leafP LIntegerLiteral _ : _ -> intP LStringLiteral _ : _ -> strP _ -> fail "expected list element" leafP :: TokParser TricuAST leafP = tok (== LKeywordT) "t" *> pure TLeaf plainVarP :: TokParser TricuAST plainVarP = do t <- tok isVar "identifier" case t of LIdentifier name -> pure (SVar name Nothing) LIdentifierWithHash name hash -> pure (SVar name (Just hash)) _ -> fail "internal parser error: expected identifier" where isVar (LIdentifier _) = True isVar (LIdentifierWithHash _ _) = True isVar _ = False namespacedVarP :: TokParser TricuAST namespacedVarP = do nsTok <- tok isNamespace "namespace" void (tok (== LDot) ".") nameTok <- tok isVar "identifier" case (nsTok, nameTok) of (LNamespace ns, LIdentifier name) -> pure (SVar (ns ++ "." ++ name) Nothing) (LNamespace ns, LIdentifierWithHash name hash) -> pure (SVar (ns ++ "." ++ name) (Just hash)) _ -> fail "internal parser error: expected namespaced identifier" where isNamespace (LNamespace _) = True isNamespace _ = False isVar (LIdentifier _) = True isVar (LIdentifierWithHash _ _) = True isVar _ = False intP :: TokParser TricuAST intP = do t <- tok isInt "integer" case t of LIntegerLiteral n -> pure (SInt (fromIntegral n)) _ -> fail "internal parser error: expected integer" where isInt (LIntegerLiteral _) = True isInt _ = False strP :: TokParser TricuAST strP = do t <- tok isStr "string" case t of LStringLiteral s -> pure (SStr s) _ -> fail "internal parser error: expected string" where isStr (LStringLiteral _) = True isStr _ = False identifierNameP :: TokParser String identifierNameP = do t <- tok isIdentifier "identifier" case t of LIdentifier name | name `Set.member` reservedNames -> fail ("reserved name cannot be used as identifier: " ++ name) | otherwise -> pure name _ -> fail "internal parser error: expected identifier" where isIdentifier (LIdentifier _) = True isIdentifier _ = False tok :: (LToken -> Bool) -> String -> TokParser LToken tok predicate expected = satisfy predicate expected peekP :: TokParser (Maybe LToken) peekP = do toks <- getInput pure $ case toks of [] -> Nothing x : _ -> Just x atEndP :: TokParser Bool atEndP = null <$> getInput skipTopNewlines :: TokParser () skipTopNewlines = skipMany (tok (== LNewline) "newline") skipNestedNewlines :: TokParser () skipNestedNewlines = skipMany (tok (== LNewline) "newline") dropNewlines :: [LToken] -> [LToken] dropNewlines (LNewline : rest) = dropNewlines rest dropNewlines rest = rest handleParseError :: [LToken] -> ParseErrorBundle [LToken] Void -> String handleParseError toks bundle = unlines ( "Parse error(s) encountered:" : map (formatError toks) (NE.toList (bundleErrors bundle)) ) formatError :: [LToken] -> ParseError [LToken] Void -> String formatError toks err = case err of TrivialError offset unexpected expected -> let unexpectedMsg = case unexpected of Nothing -> "unexpected end of input" Just x -> "unexpected " ++ show x expectedMsg = if Set.null expected then "" else "; expected one of " ++ show (Set.toList expected) in "Parse error at token offset " ++ show offset ++ ": " ++ unexpectedMsg ++ expectedMsg ++ "\nToken context:\n" ++ tokenContext toks offset FancyError offset fancy -> "Parse error at token offset " ++ show offset ++ ": " ++ show (Set.toList fancy) ++ "\nToken context:\n" ++ tokenContext toks offset tokenContext :: [LToken] -> Int -> String tokenContext toks off = let start = max 0 (off - 5) end = min (length toks) (off + 6) rows = zip [start ..] (take (end - start) (drop start toks)) in unlines (map render rows) where render (i, token) | i == off = ">>> " ++ show i ++ ": " ++ show token | otherwise = " " ++ show i ++ ": " ++ show token