tricu/src/Parser.hs

256 lines
6.9 KiB
Haskell
Raw Normal View History

module Parser where
2024-12-27 08:17:06 -06:00
import Debug.Trace
import Lexer
import Research hiding (toList)
import Control.Exception (throw)
import Data.List.NonEmpty (toList)
import qualified Data.Set as Set
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Error (errorBundlePretty, ParseErrorBundle)
type Parser = Parsec Void [LToken]
2024-12-29 08:29:25 -06:00
data TricuAST
= SVar String
| SInt Int
| SStr String
2024-12-29 08:29:25 -06:00
| SList [TricuAST]
| SFunc String [String] TricuAST
| SApp TricuAST TricuAST
| TLeaf
2024-12-29 08:29:25 -06:00
| TStem TricuAST
| TFork TricuAST TricuAST
| SLambda [String] TricuAST
deriving (Show, Eq, Ord)
2024-12-29 08:29:25 -06:00
parseTricu :: String -> [TricuAST]
parseTricu input =
let nonEmptyLines = filter (not . null) (lines input)
in map parseSingle nonEmptyLines
2024-12-29 08:29:25 -06:00
parseSingle :: String -> TricuAST
parseSingle input = case runParser parseExpression "" (lexTricu input) of
Left err -> error $ handleParseError err
Right ast -> ast
scnParser :: Parser ()
scnParser = skipMany (satisfy isNewline)
2024-12-29 08:29:25 -06:00
parseExpression :: Parser TricuAST
parseExpression = choice
[ try parseFunction
2024-12-27 08:17:06 -06:00
, try parseLambda
, try parseLambdaExpression
2024-12-27 08:17:06 -06:00
, try parseListLiteral
, try parseApplication
2024-12-27 08:17:06 -06:00
, try parseTreeTerm
, parseLiteral
]
2024-12-29 08:29:25 -06:00
parseFunction :: Parser TricuAST
parseFunction = do
LIdentifier name <- satisfy isIdentifier
args <- many (satisfy isIdentifier)
satisfy (== LAssign)
body <- parseExpression
return (SFunc name (map getIdentifier args) body)
2024-12-29 08:29:25 -06:00
parseAtomicBase :: Parser TricuAST
parseAtomicBase = choice
[ try parseVarWithoutAssignment
, parseTreeLeaf
, parseGrouped
]
2024-12-29 08:29:25 -06:00
parseVarWithoutAssignment :: Parser TricuAST
parseVarWithoutAssignment = do
LIdentifier name <- satisfy isIdentifier
if (name == "t" || name == "__result")
then fail $ "Reserved keyword: " ++ name ++ " cannot be assigned."
else notFollowedBy (satisfy (== LAssign)) *> return (SVar name)
2024-12-29 08:29:25 -06:00
parseLambda :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseLambda = between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) $ do
satisfy (== LBackslash)
2024-12-28 07:24:19 -06:00
param <- satisfy isIdentifier
rest <- many (satisfy isIdentifier)
2024-12-27 08:17:06 -06:00
satisfy (== LColon)
2024-12-28 07:24:19 -06:00
body <- parseLambdaExpression
2024-12-27 08:17:06 -06:00
let nestedLambda = foldr (\v acc -> SLambda [v] acc) body (map getIdentifier rest)
return (SLambda [getIdentifier param] nestedLambda)
2024-12-29 08:29:25 -06:00
parseLambdaExpression :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseLambdaExpression = choice
[ try parseLambdaApplication
, parseAtomicLambda
]
2024-12-29 08:29:25 -06:00
parseAtomicLambda :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseAtomicLambda = choice
[ parseVar
, parseTreeLeaf
, parseLiteral
, parseListLiteral
, try parseLambda
2024-12-27 08:17:06 -06:00
, between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseLambdaExpression
]
2024-12-29 08:29:25 -06:00
parseApplication :: Parser TricuAST
parseApplication = do
func <- parseAtomicBase
2024-12-27 08:17:06 -06:00
args <- many parseAtomic
return $ foldl (\acc arg -> SApp acc arg) func args
2024-12-29 08:29:25 -06:00
parseLambdaApplication :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseLambdaApplication = do
func <- parseAtomicLambda
args <- many parseAtomicLambda
return $ foldl (\acc arg -> SApp acc arg) func args
2024-12-29 08:29:25 -06:00
isTreeTerm :: TricuAST -> Bool
isTreeTerm TLeaf = True
isTreeTerm (TStem _) = True
isTreeTerm (TFork _ _) = True
isTreeTerm _ = False
2024-12-29 08:29:25 -06:00
parseTreeLeaf :: Parser TricuAST
parseTreeLeaf = satisfy isKeywordT *> notFollowedBy (satisfy (== LAssign)) *> pure TLeaf
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = error "Expected identifier"
2024-12-29 08:29:25 -06:00
parseTreeTerm :: Parser TricuAST
parseTreeTerm = do
base <- parseTreeLeafOrParenthesized
rest <- many parseTreeLeafOrParenthesized
pure $ foldl combine base rest
where
combine acc next = case acc of
TLeaf -> TStem next
TStem t -> TFork t next
TFork _ _ -> TFork acc next
2024-12-29 08:29:25 -06:00
parseTreeLeafOrParenthesized :: Parser TricuAST
parseTreeLeafOrParenthesized = choice
[ between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseTreeTerm
, parseTreeLeaf
]
2024-12-29 08:29:25 -06:00
foldTree :: [TricuAST] -> TricuAST
foldTree [] = TLeaf
foldTree [x] = x
foldTree (x:y:rest) = TFork x (foldTree (y:rest))
2024-12-29 08:29:25 -06:00
parseAtomic :: Parser TricuAST
parseAtomic = choice
[ parseVar
2024-12-27 08:17:06 -06:00
, parseTreeLeaf
, parseListLiteral
2024-12-27 08:17:06 -06:00
, parseGrouped
, parseLiteral
]
2024-12-29 08:29:25 -06:00
parseGrouped :: Parser TricuAST
2024-12-27 08:17:06 -06:00
parseGrouped = between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseExpression
2024-12-29 08:29:25 -06:00
parseLiteral :: Parser TricuAST
parseLiteral = choice
[ parseIntLiteral
, parseStrLiteral
]
2024-12-29 08:29:25 -06:00
parens :: Parser TricuAST -> Parser TricuAST
parens p = do
satisfy (== LOpenParen)
result <- p
satisfy (== LCloseParen)
return result
2024-12-29 08:29:25 -06:00
parseListLiteral :: Parser TricuAST
parseListLiteral = do
satisfy (== LOpenBracket)
elements <- many parseListItem
satisfy (== LCloseBracket)
return (SList elements)
2024-12-29 08:29:25 -06:00
parseListItem :: Parser TricuAST
parseListItem = choice
[ parseGroupedItem
, parseListLiteral
, parseSingleItem
]
2024-12-29 08:29:25 -06:00
parseGroupedItem :: Parser TricuAST
parseGroupedItem = do
satisfy (== LOpenParen)
inner <- parseExpression
satisfy (== LCloseParen)
return inner
2024-12-29 08:29:25 -06:00
parseSingleItem :: Parser TricuAST
parseSingleItem = do
token <- satisfy isListItem
case token of
LIdentifier name -> return (SVar name)
LKeywordT -> return TLeaf
_ -> fail "Unexpected token in list item"
isListItem :: LToken -> Bool
isListItem (LIdentifier _) = True
isListItem LKeywordT = True
isListItem _ = False
2024-12-29 08:29:25 -06:00
parseVar :: Parser TricuAST
parseVar = do
LIdentifier name <- satisfy isIdentifier
if (name == "t" || name == "__result")
then fail $ "Reserved keyword: " ++ name ++ " cannot be assigned."
else return (SVar name)
2024-12-29 08:29:25 -06:00
parseIntLiteral :: Parser TricuAST
parseIntLiteral = do
LIntegerLiteral value <- satisfy isIntegerLiteral
return (SInt value)
2024-12-29 08:29:25 -06:00
parseStrLiteral :: Parser TricuAST
parseStrLiteral = do
LStringLiteral value <- satisfy isStringLiteral
return (SStr value)
-- Boolean Helpers
isKeywordT (LKeywordT) = True
isKeywordT _ = False
isIdentifier (LIdentifier _) = True
isIdentifier _ = False
isIntegerLiteral (LIntegerLiteral _) = True
isIntegerLiteral _ = False
isStringLiteral (LStringLiteral _) = True
isStringLiteral _ = False
isLiteral (LIntegerLiteral _) = True
isLiteral (LStringLiteral _) = True
isLiteral _ = False
isNewline (LNewline) = True
isNewline _ = False
-- Error Handling
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
let errors = bundleErrors bundle
errorList = toList errors
formattedErrors = map showError errorList
in unlines ("Parse error(s) encountered:" : formattedErrors)
showError :: ParseError [LToken] Void -> String
showError (TrivialError offset (Just (Tokens tokenStream)) expected) =
"Parse error at offset " ++ show offset ++ ": unexpected token "
++ show tokenStream ++ ", expected one of " ++ show (Set.toList expected)
showError (FancyError offset fancy) =
"Parse error at offset " ++ show offset ++ ":\n " ++ unlines (map show (Set.toList fancy))
showError (TrivialError offset Nothing expected) =
"Parse error at offset " ++ show offset ++ ": expected one of "
++ show (Set.toList expected)