tricu/src/Parser.hs

233 lines
6.5 KiB
Haskell
Raw Normal View History

module Parser where
import Lexer
import Research hiding (toList)
import Control.Exception (throw)
import Data.List.NonEmpty (toList)
import qualified Data.Set as Set
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Error (errorBundlePretty, ParseErrorBundle)
type Parser = Parsec Void [LToken]
data SaplingAST
= SVar String
| SInt Int
| SStr String
| SList [SaplingAST]
| SFunc String [String] SaplingAST
| SApp SaplingAST [SaplingAST]
| TLeaf
| TStem SaplingAST
| TFork SaplingAST SaplingAST
deriving (Show, Eq, Ord)
parseSapling :: String -> [SaplingAST]
parseSapling input =
let nonEmptyLines = filter (not . null) (lines input)
in map parseSingle nonEmptyLines
parseSingle :: String -> SaplingAST
parseSingle "" = error "Empty input provided to parseSingle"
parseSingle input = case runParser parseExpression "" (lexSapling input) of
Left err -> error $ handleParseError err
Right ast -> ast
scnParser :: Parser ()
scnParser = skipMany (satisfy isNewline)
parseExpression :: Parser SaplingAST
parseExpression = choice
[ try parseFunction
, try parseApplication
, parseTreeTerm
, parseLiteral
, parseListLiteral
]
parseFunction :: Parser SaplingAST
parseFunction = do
LIdentifier name <- satisfy isIdentifier
args <- many (satisfy isIdentifier)
satisfy (== LAssign)
body <- parseExpression
return (SFunc name (map getIdentifier args) body)
parseApplication :: Parser SaplingAST
parseApplication = do
func <- parseAtomicBase
args <- many parseAtomicApplication
case func of
TLeaf | not (null args) && all isTreeTerm args -> fail "Defer to Tree Calculus"
_ -> return (SApp func args)
isTreeTerm :: SaplingAST -> Bool
isTreeTerm TLeaf = True
isTreeTerm (TStem _) = True
isTreeTerm (TFork _ _) = True
isTreeTerm _ = False
parseAtomicBase :: Parser SaplingAST
parseAtomicBase = choice
[ parseVar
, parseTreeLeaf
]
parseTreeLeaf :: Parser SaplingAST
parseTreeLeaf = satisfy isKeywordT *> notFollowedBy (satisfy (== LAssign)) *> pure TLeaf
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = error "Expected identifier"
parseTreeTerm :: Parser SaplingAST
parseTreeTerm = do
base <- parseTreeLeafOrParenthesized
rest <- many parseTreeLeafOrParenthesized
pure $ foldl combine base rest
where
combine acc next = case acc of
TLeaf -> TStem next
TStem t -> TFork t next
TFork _ _ -> TFork acc next
parseTreeLeafOrParenthesized :: Parser SaplingAST
parseTreeLeafOrParenthesized = choice
[ between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseTreeTerm
, parseTreeLeaf
]
foldTree :: [SaplingAST] -> SaplingAST
foldTree [] = TLeaf
foldTree [x] = x
foldTree (x:y:rest) = TFork x (foldTree (y:rest))
parseAtomic :: Parser SaplingAST
parseAtomic = choice
[ parseVar
, parseTreeLeafOrParenthesized
, parseLiteral
, parseListLiteral
, between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseExpression
]
parseAtomicApplication :: Parser SaplingAST
parseAtomicApplication = do
token <- anySingle
case token of
LAssign -> fail
"Unexpected `=` character in application context. \
\ This is usually caused by an incomplete definition such as: \
\ `function a b =`"
LIdentifier name -> return (SVar name)
LKeywordT -> return TLeaf
LIntegerLiteral value -> return (SInt value)
LStringLiteral value -> return (SStr value)
LOpenBracket -> parseListLiteral
LOpenParen -> between (satisfy (== LOpenParen)) (satisfy (== LCloseParen)) parseExpression
_ -> fail "Invalid token while parsing attempted function application"
parseLiteral :: Parser SaplingAST
parseLiteral = choice
[ parseIntLiteral
, parseStrLiteral
]
parens :: Parser SaplingAST -> Parser SaplingAST
parens p = do
satisfy (== LOpenParen)
result <- p
satisfy (== LCloseParen)
return result
parseListLiteral :: Parser SaplingAST
parseListLiteral = do
satisfy (== LOpenBracket)
elements <- many parseListItem
satisfy (== LCloseBracket)
return (SList elements)
parseListItem :: Parser SaplingAST
parseListItem = choice
[ parseGroupedItem
, parseListLiteral
, parseSingleItem
]
parseGroupedItem :: Parser SaplingAST
parseGroupedItem = do
satisfy (== LOpenParen)
inner <- parseExpression
satisfy (== LCloseParen)
return inner
parseSingleItem :: Parser SaplingAST
parseSingleItem = do
token <- satisfy isListItem
case token of
LIdentifier name -> return (SVar name)
LKeywordT -> return TLeaf
_ -> fail "Unexpected token in list item"
isListItem :: LToken -> Bool
isListItem (LIdentifier _) = True
isListItem LKeywordT = True
isListItem _ = False
parseVar :: Parser SaplingAST
parseVar = do
LIdentifier name <- satisfy isIdentifier
if (name == "t" || name == "__result")
then fail $ "Reserved keyword: " ++ name ++ " cannot be assigned."
else return (SVar name)
parseIntLiteral :: Parser SaplingAST
parseIntLiteral = do
LIntegerLiteral value <- satisfy isIntegerLiteral
return (SInt value)
parseStrLiteral :: Parser SaplingAST
parseStrLiteral = do
LStringLiteral value <- satisfy isStringLiteral
return (SStr value)
-- Boolean Helpers
2024-12-18 19:15:56 -06:00
isKeywordT (LKeywordT) = True
isKeywordT _ = False
2024-12-18 19:15:56 -06:00
isIdentifier (LIdentifier _) = True
isIdentifier _ = False
isIntegerLiteral (LIntegerLiteral _) = True
isIntegerLiteral _ = False
isStringLiteral (LStringLiteral _) = True
isStringLiteral _ = False
isLiteral (LIntegerLiteral _) = True
isLiteral (LStringLiteral _) = True
isLiteral _ = False
esNewline (LNewline) = True
isNewline _ = False
-- Error Handling
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
let errors = bundleErrors bundle
errorList = toList errors
formattedErrors = map showError errorList
in unlines ("Parse error(s) encountered:" : formattedErrors)
showError :: ParseError [LToken] Void -> String
showError (TrivialError offset (Just (Tokens tokenStream)) expected) =
"Parse error at offset " ++ show offset ++ ": unexpected token "
++ show tokenStream ++ ", expected one of " ++ show (Set.toList expected)
showError (FancyError offset fancy) =
"Parse error at offset " ++ show offset ++ ":\n " ++ unlines (map show (Set.toList fancy))
showError (TrivialError offset Nothing expected) =
"Parse error at offset " ++ show offset ++ ": expected one of "
++ show (Set.toList expected)