tricu/src/Parser.hs
James Eversole ad02c8b86a General refactor for legibility
Priming to update all source to lhs and document extensively
2025-01-19 14:41:25 -06:00

282 lines
7.5 KiB
Haskell

module Parser where
import Lexer
import Research hiding (toList)
import Data.List.NonEmpty (toList)
import Data.Void (Void)
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty)
import qualified Data.Set as Set
type Parser = Parsec Void [LToken]
type AltParser = Parsec Void String
parseTricu :: String -> [TricuAST]
parseTricu input
| null tokens = []
| otherwise = map parseSingle tokens
where
tokens
| null (lexTricu input) = []
| otherwise = lines input
parseSingle :: String -> TricuAST
parseSingle input
| null tokens = SEmpty
| Left err <- parsed = error $ handleParseError err
| Right ast <- parsed = ast
where
tokens = lexTricu input
parsed = runParser parseExpression "" tokens
parseExpression :: Parser TricuAST
parseExpression = choice
[ try parseFunction
, try parseLambda
, try parseLambdaExpression
, try parseListLiteral
, try parseApplication
, try parseTreeTerm
, parseLiteral
]
scnParser :: Parser ()
scnParser = skipMany (satisfy isNewline)
parseFunction :: Parser TricuAST
parseFunction = do
LIdentifier name <- satisfy isIdentifier
args <- many (satisfy isIdentifier)
satisfy (== LAssign)
body <- parseExpression
return (SFunc name (map getIdentifier args) body)
parseAtomicBase :: Parser TricuAST
parseAtomicBase = choice
[ parseTreeLeaf
, parseGrouped
]
parseLambda :: Parser TricuAST
parseLambda = between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) $ do
satisfy (== LBackslash)
param <- satisfy isIdentifier
rest <- many (satisfy isIdentifier)
satisfy (== LColon)
body <- parseLambdaExpression
let nestedLambda = foldr (\v acc -> SLambda [v] acc) body (map getIdentifier rest)
return (SLambda [getIdentifier param] nestedLambda)
parseLambdaExpression :: Parser TricuAST
parseLambdaExpression = choice
[ try parseLambdaApplication
, parseAtomicLambda
]
parseAtomicLambda :: Parser TricuAST
parseAtomicLambda = choice
[ parseVar
, parseTreeLeaf
, parseLiteral
, parseListLiteral
, try parseLambda
, between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseLambdaExpression
]
parseApplication :: Parser TricuAST
parseApplication = do
func <- parseAtomicBase
args <- many parseAtomic
return $ foldl (\acc arg -> SApp acc arg) func args
parseLambdaApplication :: Parser TricuAST
parseLambdaApplication = do
func <- parseAtomicLambda
args <- many parseAtomicLambda
return $ foldl (\acc arg -> SApp acc arg) func args
isTreeTerm :: TricuAST -> Bool
isTreeTerm TLeaf = True
isTreeTerm (TStem _) = True
isTreeTerm (TFork _ _) = True
isTreeTerm _ = False
parseTreeLeaf :: Parser TricuAST
parseTreeLeaf = satisfy isKeywordT *> notFollowedBy (satisfy (== LAssign)) *> pure TLeaf
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = error "Expected identifier"
parseTreeTerm :: Parser TricuAST
parseTreeTerm = do
base <- parseTreeLeafOrParenthesized
rest <- many parseTreeLeafOrParenthesized
pure $ foldl combine base rest
where
combine acc next
| TLeaf <- acc = TStem next
| TStem t <- acc = TFork t next
| TFork _ _ <- acc = TFork acc next
parseTreeLeafOrParenthesized :: Parser TricuAST
parseTreeLeafOrParenthesized = choice
[ between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseTreeTerm
, parseTreeLeaf
]
foldTree :: [TricuAST] -> TricuAST
foldTree [] = TLeaf
foldTree [x] = x
foldTree (x:y:rest) = TFork x (foldTree (y:rest))
parseAtomic :: Parser TricuAST
parseAtomic = choice
[ parseVar
, parseTreeLeaf
, parseListLiteral
, parseGrouped
, parseLiteral
]
parseGrouped :: Parser TricuAST
parseGrouped = between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseExpression
parseLiteral :: Parser TricuAST
parseLiteral = choice
[ parseIntLiteral
, parseStrLiteral
]
parens :: Parser TricuAST -> Parser TricuAST
parens p = do
satisfy (== LOpenParen)
result <- p
satisfy (== LCloseParen)
return result
parseListLiteral :: Parser TricuAST
parseListLiteral = do
satisfy (== LOpenBracket)
elements <- many parseListItem
satisfy (== LCloseBracket)
return (SList elements)
parseListItem :: Parser TricuAST
parseListItem = choice
[ parseGroupedItem
, parseListLiteral
, parseSingleItem
]
parseGroupedItem :: Parser TricuAST
parseGroupedItem = do
satisfy (== LOpenParen)
inner <- parseExpression
satisfy (== LCloseParen)
return inner
parseSingleItem :: Parser TricuAST
parseSingleItem = do
token <- satisfy isListItem
case token of
_ | LIdentifier name <- token -> return (SVar name)
| LKeywordT <- token -> return TLeaf
| otherwise -> fail "Unexpected token in list item"
isListItem :: LToken -> Bool
isListItem (LIdentifier _) = True
isListItem LKeywordT = True
isListItem _ = False
parseVar :: Parser TricuAST
parseVar = do
LIdentifier name <- satisfy isIdentifier
if (name == "t" || name == "__result")
then fail $ "Reserved keyword: " ++ name ++ " cannot be assigned."
else return (SVar name)
parseIntLiteral :: Parser TricuAST
parseIntLiteral = do
LIntegerLiteral value <- satisfy isIntegerLiteral
return (SInt value)
parseStrLiteral :: Parser TricuAST
parseStrLiteral = do
LStringLiteral value <- satisfy isStringLiteral
return (SStr value)
-- Boolean Helpers
isKeywordT (LKeywordT) = True
isKeywordT _ = False
isIdentifier (LIdentifier _) = True
isIdentifier _ = False
isIntegerLiteral (LIntegerLiteral _) = True
isIntegerLiteral _ = False
isStringLiteral (LStringLiteral _) = True
isStringLiteral _ = False
isLiteral (LIntegerLiteral _) = True
isLiteral (LStringLiteral _) = True
isLiteral _ = False
isNewline (LNewline) = True
isNewline _ = False
-- Alternative parsers
altSC :: AltParser ()
altSC = skipMany (char ' ' <|> char '\t' <|> char '\n')
parseTernaryTerm :: AltParser TricuAST
parseTernaryTerm = do
altSC
term <- choice parseTernaryTerm'
altSC
pure term
where
parseTernaryTerm' =
[ try (between (char '(') (char ')') parseTernaryTerm)
, try parseTernaryLeaf
, try parseTernaryStem
, try parseTernaryFork
]
parseTernaryLeaf :: AltParser TricuAST
parseTernaryLeaf = char '0' *> pure TLeaf
parseTernaryStem :: AltParser TricuAST
parseTernaryStem = char '1' *> (TStem <$> parseTernaryTerm)
parseTernaryFork :: AltParser TricuAST
parseTernaryFork = do
char '2'
term1 <- parseTernaryTerm
term2 <- parseTernaryTerm
pure $ TFork term1 term2
parseTernary :: String -> Either String TricuAST
parseTernary input
| Left err <- result = Left (errorBundlePretty err)
| Right ast <- result = Right ast
where
result = runParser (parseTernaryTerm <* eof) "" input
-- Error Handling
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
let errors = bundleErrors bundle
errorList = toList errors
formattedErrors = map showError errorList
in unlines ("Parse error(s) encountered:" : formattedErrors)
showError :: ParseError [LToken] Void -> String
showError (TrivialError offset (Just (Tokens tokenStream)) expected) =
"Parse error at offset " ++ show offset ++ ": unexpected token "
++ show tokenStream ++ ", expected one of " ++ show (Set.toList expected)
showError (FancyError offset fancy) =
"Parse error at offset " ++ show offset ++ ":\n " ++ unlines (map show (Set.toList fancy))
showError (TrivialError offset Nothing expected) =
"Parse error at offset " ++ show offset ++ ": expected one of "
++ show (Set.toList expected)