tricu/src/Parser.hs
2024-12-29 12:22:24 -06:00

256 lines
6.9 KiB
Haskell

module Parser where
import Debug.Trace
import Lexer
import Research hiding (toList)
import Control.Exception (throw)
import Data.List.NonEmpty (toList)
import qualified Data.Set as Set
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Error (errorBundlePretty, ParseErrorBundle)
type Parser = Parsec Void [LToken]
data TricuAST
= SVar String
| SInt Int
| SStr String
| SList [TricuAST]
| SFunc String [String] TricuAST
| SApp TricuAST TricuAST
| TLeaf
| TStem TricuAST
| TFork TricuAST TricuAST
| SLambda [String] TricuAST
deriving (Show, Eq, Ord)
parseTricu :: String -> [TricuAST]
parseTricu input =
let nonEmptyLines = filter (not . null) (lines input)
in map parseSingle nonEmptyLines
parseSingle :: String -> TricuAST
parseSingle input = case runParser parseExpression "" (lexTricu input) of
Left err -> error $ handleParseError err
Right ast -> ast
parseExpression :: Parser TricuAST
parseExpression = choice
[ try parseFunction
, try parseLambda
, try parseLambdaExpression
, try parseListLiteral
, try parseApplication
, try parseTreeTerm
, parseLiteral
]
scnParser :: Parser ()
scnParser = skipMany (satisfy isNewline)
parseFunction :: Parser TricuAST
parseFunction = do
LIdentifier name <- satisfy isIdentifier
args <- many (satisfy isIdentifier)
satisfy (== LAssign)
body <- parseExpression
return (SFunc name (map getIdentifier args) body)
parseAtomicBase :: Parser TricuAST
parseAtomicBase = choice
[ try parseVarWithoutAssignment
, parseTreeLeaf
, parseGrouped
]
parseVarWithoutAssignment :: Parser TricuAST
parseVarWithoutAssignment = do
LIdentifier name <- satisfy isIdentifier
if (name == "t" || name == "__result")
then fail $ "Reserved keyword: " ++ name ++ " cannot be assigned."
else notFollowedBy (satisfy (== LAssign)) *> return (SVar name)
parseLambda :: Parser TricuAST
parseLambda = between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) $ do
satisfy (== LBackslash)
param <- satisfy isIdentifier
rest <- many (satisfy isIdentifier)
satisfy (== LColon)
body <- parseLambdaExpression
let nestedLambda = foldr (\v acc -> SLambda [v] acc) body (map getIdentifier rest)
return (SLambda [getIdentifier param] nestedLambda)
parseLambdaExpression :: Parser TricuAST
parseLambdaExpression = choice
[ try parseLambdaApplication
, parseAtomicLambda
]
parseAtomicLambda :: Parser TricuAST
parseAtomicLambda = choice
[ parseVar
, parseTreeLeaf
, parseLiteral
, parseListLiteral
, try parseLambda
, between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseLambdaExpression
]
parseApplication :: Parser TricuAST
parseApplication = do
func <- parseAtomicBase
args <- many parseAtomic
return $ foldl (\acc arg -> SApp acc arg) func args
parseLambdaApplication :: Parser TricuAST
parseLambdaApplication = do
func <- parseAtomicLambda
args <- many parseAtomicLambda
return $ foldl (\acc arg -> SApp acc arg) func args
isTreeTerm :: TricuAST -> Bool
isTreeTerm TLeaf = True
isTreeTerm (TStem _) = True
isTreeTerm (TFork _ _) = True
isTreeTerm _ = False
parseTreeLeaf :: Parser TricuAST
parseTreeLeaf = satisfy isKeywordT *> notFollowedBy (satisfy (== LAssign)) *> pure TLeaf
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = error "Expected identifier"
parseTreeTerm :: Parser TricuAST
parseTreeTerm = do
base <- parseTreeLeafOrParenthesized
rest <- many parseTreeLeafOrParenthesized
pure $ foldl combine base rest
where
combine acc next = case acc of
TLeaf -> TStem next
TStem t -> TFork t next
TFork _ _ -> TFork acc next
parseTreeLeafOrParenthesized :: Parser TricuAST
parseTreeLeafOrParenthesized = choice
[ between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseTreeTerm
, parseTreeLeaf
]
foldTree :: [TricuAST] -> TricuAST
foldTree [] = TLeaf
foldTree [x] = x
foldTree (x:y:rest) = TFork x (foldTree (y:rest))
parseAtomic :: Parser TricuAST
parseAtomic = choice
[ parseVar
, parseTreeLeaf
, parseListLiteral
, parseGrouped
, parseLiteral
]
parseGrouped :: Parser TricuAST
parseGrouped = between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseExpression
parseLiteral :: Parser TricuAST
parseLiteral = choice
[ parseIntLiteral
, parseStrLiteral
]
parens :: Parser TricuAST -> Parser TricuAST
parens p = do
satisfy (== LOpenParen)
result <- p
satisfy (== LCloseParen)
return result
parseListLiteral :: Parser TricuAST
parseListLiteral = do
satisfy (== LOpenBracket)
elements <- many parseListItem
satisfy (== LCloseBracket)
return (SList elements)
parseListItem :: Parser TricuAST
parseListItem = choice
[ parseGroupedItem
, parseListLiteral
, parseSingleItem
]
parseGroupedItem :: Parser TricuAST
parseGroupedItem = do
satisfy (== LOpenParen)
inner <- parseExpression
satisfy (== LCloseParen)
return inner
parseSingleItem :: Parser TricuAST
parseSingleItem = do
token <- satisfy isListItem
case token of
LIdentifier name -> return (SVar name)
LKeywordT -> return TLeaf
_ -> fail "Unexpected token in list item"
isListItem :: LToken -> Bool
isListItem (LIdentifier _) = True
isListItem LKeywordT = True
isListItem _ = False
parseVar :: Parser TricuAST
parseVar = do
LIdentifier name <- satisfy isIdentifier
if (name == "t" || name == "__result")
then fail $ "Reserved keyword: " ++ name ++ " cannot be assigned."
else return (SVar name)
parseIntLiteral :: Parser TricuAST
parseIntLiteral = do
LIntegerLiteral value <- satisfy isIntegerLiteral
return (SInt value)
parseStrLiteral :: Parser TricuAST
parseStrLiteral = do
LStringLiteral value <- satisfy isStringLiteral
return (SStr value)
-- Boolean Helpers
isKeywordT (LKeywordT) = True
isKeywordT _ = False
isIdentifier (LIdentifier _) = True
isIdentifier _ = False
isIntegerLiteral (LIntegerLiteral _) = True
isIntegerLiteral _ = False
isStringLiteral (LStringLiteral _) = True
isStringLiteral _ = False
isLiteral (LIntegerLiteral _) = True
isLiteral (LStringLiteral _) = True
isLiteral _ = False
isNewline (LNewline) = True
isNewline _ = False
-- Error Handling
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
let errors = bundleErrors bundle
errorList = toList errors
formattedErrors = map showError errorList
in unlines ("Parse error(s) encountered:" : formattedErrors)
showError :: ParseError [LToken] Void -> String
showError (TrivialError offset (Just (Tokens tokenStream)) expected) =
"Parse error at offset " ++ show offset ++ ": unexpected token "
++ show tokenStream ++ ", expected one of " ++ show (Set.toList expected)
showError (FancyError offset fancy) =
"Parse error at offset " ++ show offset ++ ":\n " ++ unlines (map show (Set.toList fancy))
showError (TrivialError offset Nothing expected) =
"Parse error at offset " ++ show offset ++ ": expected one of "
++ show (Set.toList expected)