tricu/src/Parser.hs

282 lines
7.5 KiB
Haskell
Raw Normal View History

module Parser where
import Lexer
import Research hiding (toList)
2024-12-29 20:29:41 -06:00
import Data.List.NonEmpty (toList)
2024-12-29 20:29:41 -06:00
import Data.Void (Void)
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty)
2024-12-29 20:29:41 -06:00
import qualified Data.Set as Set
type Parser = Parsec Void [LToken]
type AltParser = Parsec Void String
2024-12-29 08:29:25 -06:00
parseTricu :: String -> [TricuAST]
2024-12-29 20:29:41 -06:00
parseTricu input
| null tokens = []
| otherwise = map parseSingle tokens
2024-12-29 20:29:41 -06:00
where
tokens
| null (lexTricu input) = []
| otherwise = lines input
2024-12-29 08:29:25 -06:00
parseSingle :: String -> TricuAST
parseSingle input
| null tokens = SEmpty
| Left err <- parsed = error $ handleParseError err
| Right ast <- parsed = ast
where
tokens = lexTricu input
parsed = runParser parseExpression "" tokens
2024-12-29 08:29:25 -06:00
parseExpression :: Parser TricuAST
parseExpression = choice
[ try parseFunction
2024-12-27 08:17:06 -06:00
, try parseLambda
, try parseLambdaExpression
2024-12-27 08:17:06 -06:00
, try parseListLiteral
, try parseApplication
2024-12-27 08:17:06 -06:00
, try parseTreeTerm
, parseLiteral
]
2024-12-29 12:22:24 -06:00
scnParser :: Parser ()
scnParser = skipMany (satisfy isNewline)
2024-12-29 08:29:25 -06:00
parseFunction :: Parser TricuAST
parseFunction = do
LIdentifier name <- satisfy isIdentifier
args <- many (satisfy isIdentifier)
satisfy (== LAssign)
body <- parseExpression
return (SFunc name (map getIdentifier args) body)
2024-12-29 08:29:25 -06:00
parseAtomicBase :: Parser TricuAST
parseAtomicBase = choice
[ parseTreeLeaf
, parseGrouped
]
2024-12-29 08:29:25 -06:00
parseLambda :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseLambda = between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) $ do
satisfy (== LBackslash)
2024-12-28 07:24:19 -06:00
param <- satisfy isIdentifier
rest <- many (satisfy isIdentifier)
2024-12-27 08:17:06 -06:00
satisfy (== LColon)
2024-12-28 07:24:19 -06:00
body <- parseLambdaExpression
2024-12-27 08:17:06 -06:00
let nestedLambda = foldr (\v acc -> SLambda [v] acc) body (map getIdentifier rest)
return (SLambda [getIdentifier param] nestedLambda)
2024-12-29 08:29:25 -06:00
parseLambdaExpression :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseLambdaExpression = choice
[ try parseLambdaApplication
, parseAtomicLambda
]
2024-12-29 08:29:25 -06:00
parseAtomicLambda :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseAtomicLambda = choice
[ parseVar
, parseTreeLeaf
, parseLiteral
, parseListLiteral
, try parseLambda
2024-12-27 08:17:06 -06:00
, between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseLambdaExpression
]
2024-12-29 08:29:25 -06:00
parseApplication :: Parser TricuAST
parseApplication = do
func <- parseAtomicBase
2024-12-27 08:17:06 -06:00
args <- many parseAtomic
return $ foldl (\acc arg -> SApp acc arg) func args
2024-12-29 08:29:25 -06:00
parseLambdaApplication :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseLambdaApplication = do
func <- parseAtomicLambda
args <- many parseAtomicLambda
return $ foldl (\acc arg -> SApp acc arg) func args
2024-12-29 08:29:25 -06:00
isTreeTerm :: TricuAST -> Bool
isTreeTerm TLeaf = True
isTreeTerm (TStem _) = True
isTreeTerm (TFork _ _) = True
isTreeTerm _ = False
2024-12-29 08:29:25 -06:00
parseTreeLeaf :: Parser TricuAST
parseTreeLeaf = satisfy isKeywordT *> notFollowedBy (satisfy (== LAssign)) *> pure TLeaf
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = error "Expected identifier"
2024-12-29 08:29:25 -06:00
parseTreeTerm :: Parser TricuAST
parseTreeTerm = do
base <- parseTreeLeafOrParenthesized
rest <- many parseTreeLeafOrParenthesized
pure $ foldl combine base rest
where
combine acc next
| TLeaf <- acc = TStem next
| TStem t <- acc = TFork t next
| TFork _ _ <- acc = TFork acc next
2024-12-29 08:29:25 -06:00
parseTreeLeafOrParenthesized :: Parser TricuAST
parseTreeLeafOrParenthesized = choice
[ between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseTreeTerm
, parseTreeLeaf
]
2024-12-29 08:29:25 -06:00
foldTree :: [TricuAST] -> TricuAST
foldTree [] = TLeaf
foldTree [x] = x
foldTree (x:y:rest) = TFork x (foldTree (y:rest))
2024-12-29 08:29:25 -06:00
parseAtomic :: Parser TricuAST
parseAtomic = choice
[ parseVar
2024-12-27 08:17:06 -06:00
, parseTreeLeaf
, parseListLiteral
2024-12-27 08:17:06 -06:00
, parseGrouped
, parseLiteral
]
2024-12-29 08:29:25 -06:00
parseGrouped :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseGrouped = between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseExpression
2024-12-29 08:29:25 -06:00
parseLiteral :: Parser TricuAST
parseLiteral = choice
[ parseIntLiteral
, parseStrLiteral
]
2024-12-29 08:29:25 -06:00
parens :: Parser TricuAST -> Parser TricuAST
parens p = do
satisfy (== LOpenParen)
result <- p
satisfy (== LCloseParen)
return result
2024-12-29 08:29:25 -06:00
parseListLiteral :: Parser TricuAST
parseListLiteral = do
satisfy (== LOpenBracket)
elements <- many parseListItem
satisfy (== LCloseBracket)
return (SList elements)
2024-12-29 08:29:25 -06:00
parseListItem :: Parser TricuAST
parseListItem = choice
[ parseGroupedItem
, parseListLiteral
, parseSingleItem
]
2024-12-29 08:29:25 -06:00
parseGroupedItem :: Parser TricuAST
parseGroupedItem = do
satisfy (== LOpenParen)
inner <- parseExpression
satisfy (== LCloseParen)
return inner
2024-12-29 08:29:25 -06:00
parseSingleItem :: Parser TricuAST
parseSingleItem = do
token <- satisfy isListItem
case token of
_ | LIdentifier name <- token -> return (SVar name)
| LKeywordT <- token -> return TLeaf
| otherwise -> fail "Unexpected token in list item"
isListItem :: LToken -> Bool
isListItem (LIdentifier _) = True
isListItem LKeywordT = True
isListItem _ = False
2024-12-29 08:29:25 -06:00
parseVar :: Parser TricuAST
parseVar = do
LIdentifier name <- satisfy isIdentifier
if (name == "t" || name == "__result")
then fail $ "Reserved keyword: " ++ name ++ " cannot be assigned."
else return (SVar name)
2024-12-29 08:29:25 -06:00
parseIntLiteral :: Parser TricuAST
parseIntLiteral = do
LIntegerLiteral value <- satisfy isIntegerLiteral
return (SInt value)
2024-12-29 08:29:25 -06:00
parseStrLiteral :: Parser TricuAST
parseStrLiteral = do
LStringLiteral value <- satisfy isStringLiteral
return (SStr value)
-- Boolean Helpers
isKeywordT (LKeywordT) = True
isKeywordT _ = False
isIdentifier (LIdentifier _) = True
isIdentifier _ = False
isIntegerLiteral (LIntegerLiteral _) = True
isIntegerLiteral _ = False
isStringLiteral (LStringLiteral _) = True
isStringLiteral _ = False
isLiteral (LIntegerLiteral _) = True
isLiteral (LStringLiteral _) = True
isLiteral _ = False
isNewline (LNewline) = True
isNewline _ = False
-- Alternative parsers
altSC :: AltParser ()
altSC = skipMany (char ' ' <|> char '\t' <|> char '\n')
parseTernaryTerm :: AltParser TricuAST
parseTernaryTerm = do
altSC
term <- choice parseTernaryTerm'
altSC
pure term
where
parseTernaryTerm' =
[ try (between (char '(') (char ')') parseTernaryTerm)
, try parseTernaryLeaf
, try parseTernaryStem
, try parseTernaryFork
]
parseTernaryLeaf :: AltParser TricuAST
parseTernaryLeaf = char '0' *> pure TLeaf
parseTernaryStem :: AltParser TricuAST
parseTernaryStem = char '1' *> (TStem <$> parseTernaryTerm)
parseTernaryFork :: AltParser TricuAST
parseTernaryFork = do
char '2'
term1 <- parseTernaryTerm
term2 <- parseTernaryTerm
pure $ TFork term1 term2
parseTernary :: String -> Either String TricuAST
parseTernary input
| Left err <- result = Left (errorBundlePretty err)
| Right ast <- result = Right ast
where
result = runParser (parseTernaryTerm <* eof) "" input
-- Error Handling
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
let errors = bundleErrors bundle
errorList = toList errors
formattedErrors = map showError errorList
in unlines ("Parse error(s) encountered:" : formattedErrors)
showError :: ParseError [LToken] Void -> String
showError (TrivialError offset (Just (Tokens tokenStream)) expected) =
"Parse error at offset " ++ show offset ++ ": unexpected token "
++ show tokenStream ++ ", expected one of " ++ show (Set.toList expected)
showError (FancyError offset fancy) =
"Parse error at offset " ++ show offset ++ ":\n " ++ unlines (map show (Set.toList fancy))
showError (TrivialError offset Nothing expected) =
"Parse error at offset " ++ show offset ++ ": expected one of "
++ show (Set.toList expected)