tricu/src/Parser.hs
James Eversole 87aed72ab2
All checks were successful
Test, Build, and Release / test (push) Successful in 1m43s
Test, Build, and Release / build (push) Successful in 1m16s
# Modules
Basic implementation of a module system including tests.
2025-01-27 16:04:04 -06:00

317 lines
9.0 KiB
Haskell

module Parser where
import Lexer
import Research
import Control.Monad (void)
import Control.Monad.State
import Data.List.NonEmpty (toList)
import Data.Void (Void)
import Text.Megaparsec
import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty)
import qualified Data.Set as Set
data PState = PState
{ parenDepth :: Int
, bracketDepth :: Int
} deriving (Show)
type ParserM = StateT PState (Parsec Void [LToken])
satisfyM :: (LToken -> Bool) -> ParserM LToken
satisfyM f = do
token <- lift (satisfy f)
modify' (updateDepth token)
return token
updateDepth :: LToken -> PState -> PState
updateDepth LOpenParen st = st { parenDepth = parenDepth st + 1 }
updateDepth LOpenBracket st = st { bracketDepth = bracketDepth st + 1 }
updateDepth LCloseParen st = st { parenDepth = parenDepth st - 1 }
updateDepth LCloseBracket st = st { bracketDepth = bracketDepth st - 1 }
updateDepth _ st = st
topLevelNewline :: ParserM ()
topLevelNewline = do
st <- get
if parenDepth st == 0 && bracketDepth st == 0
then void (satisfyM (== LNewline))
else fail "Top-level exit in nested context (paren or bracket)"
parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST]
parseProgram tokens =
runParser (evalStateT (parseProgramM <* finalizeDepth <* eof) (PState 0 0)) "" tokens
parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST
parseSingleExpr tokens =
runParser (evalStateT (scnParserM *> parseExpressionM <* finalizeDepth <* eof) (PState 0 0)) "" tokens
finalizeDepth :: ParserM ()
finalizeDepth = do
st <- get
case (parenDepth st, bracketDepth st) of
(0, 0) -> pure ()
(p, b) -> fail $ "Unmatched tokens: " ++ show (p, b)
parseTricu :: String -> [TricuAST]
parseTricu input =
case lexTricu input of
[] -> []
toks ->
case parseProgram toks of
Left err -> errorWithoutStackTrace (handleParseError err)
Right asts -> asts
parseSingle :: String -> TricuAST
parseSingle input =
case lexTricu input of
[] -> SEmpty
toks ->
case parseSingleExpr toks of
Left err -> errorWithoutStackTrace (handleParseError err)
Right ast -> ast
parseProgramM :: ParserM [TricuAST]
parseProgramM = do
skipMany topLevelNewline
moduleNode <- optional parseModuleM
skipMany topLevelNewline
importNodes <- many (do
node <- parseImportM
skipMany topLevelNewline
return node)
skipMany topLevelNewline
exprs <- sepEndBy parseOneExpression (some topLevelNewline)
skipMany topLevelNewline
return (maybe [] (: []) moduleNode ++ importNodes ++ exprs)
parseModuleM :: ParserM TricuAST
parseModuleM = do
LModule moduleName <- satisfyM isModule
pure (SModule moduleName)
where
isModule (LModule _) = True
isModule _ = False
parseImportM :: ParserM TricuAST
parseImportM = do
LImport filePath moduleName <- satisfyM isImport
pure (SImport filePath moduleName)
where
isImport (LImport _ _) = True
isImport _ = False
parseOneExpression :: ParserM TricuAST
parseOneExpression = scnParserM *> parseExpressionM
scnParserM :: ParserM ()
scnParserM = skipMany $ do
t <- lookAhead anySingle
st <- get
if | (parenDepth st > 0 || bracketDepth st > 0) && (t == LNewline) ->
void $ satisfyM (== LNewline)
| otherwise ->
fail "In nested context or no space token" <|> empty
eofM :: ParserM ()
eofM = lift eof
parseExpressionM :: ParserM TricuAST
parseExpressionM = choice
[ try parseFunctionM
, try parseLambdaM
, try parseLambdaExpressionM
, try parseListLiteralM
, try parseApplicationM
, try parseTreeTermM
, parseLiteralM
]
parseFunctionM :: ParserM TricuAST
parseFunctionM = do
let ident = (\case LIdentifier _ -> True; _ -> False)
LIdentifier name <- satisfyM ident
args <- many $ satisfyM ident
_ <- satisfyM (== LAssign)
scnParserM
body <- parseExpressionM
pure (SDef name (map getIdentifier args) body)
parseLambdaM :: ParserM TricuAST
parseLambdaM = do
let ident = (\case LIdentifier _ -> True; _ -> False)
_ <- satisfyM (== LBackslash)
params <- some (satisfyM ident)
_ <- satisfyM (== LColon)
scnParserM
body <- parseLambdaExpressionM
pure $ foldr (\param acc -> SLambda [getIdentifier param] acc) body params
parseLambdaExpressionM :: ParserM TricuAST
parseLambdaExpressionM = choice
[ try parseLambdaApplicationM
, parseAtomicLambdaM
]
parseAtomicLambdaM :: ParserM TricuAST
parseAtomicLambdaM = choice
[ parseVarM
, parseTreeLeafM
, parseLiteralM
, parseListLiteralM
, try parseLambdaM
, between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM
]
parseApplicationM :: ParserM TricuAST
parseApplicationM = do
func <- parseAtomicBaseM
scnParserM
args <- many $ do
scnParserM
arg <- parseAtomicM
return arg
return $ foldl SApp func args
parseLambdaApplicationM :: ParserM TricuAST
parseLambdaApplicationM = do
func <- parseAtomicLambdaM
scnParserM
args <- many $ do
arg <- parseAtomicLambdaM
scnParserM
pure arg
pure $ foldl SApp func args
parseAtomicBaseM :: ParserM TricuAST
parseAtomicBaseM = choice
[ parseTreeLeafM
, parseGroupedM
]
parseTreeLeafM :: ParserM TricuAST
parseTreeLeafM = do
let keyword = (\case LKeywordT -> True; _ -> False)
_ <- satisfyM keyword
notFollowedBy $ lift $ satisfy (== LAssign)
pure TLeaf
parseTreeTermM :: ParserM TricuAST
parseTreeTermM = do
base <- parseTreeLeafOrParenthesizedM
rest <- many parseTreeLeafOrParenthesizedM
pure (foldl combine base rest)
where
combine acc next
| TLeaf <- acc = TStem next
| TStem t <- acc = TFork t next
| TFork _ _ <- acc = TFork acc next
parseTreeLeafOrParenthesizedM :: ParserM TricuAST
parseTreeLeafOrParenthesizedM = choice
[ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM
, parseTreeLeafM
]
parseAtomicM :: ParserM TricuAST
parseAtomicM = choice
[ parseVarM
, parseTreeLeafM
, parseListLiteralM
, parseGroupedM
, parseLiteralM
]
parseGroupedM :: ParserM TricuAST
parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $
scnParserM *> parseExpressionM <* scnParserM
parseLiteralM :: ParserM TricuAST
parseLiteralM = choice
[ parseIntLiteralM
, parseStrLiteralM
]
parseListLiteralM :: ParserM TricuAST
parseListLiteralM = do
_ <- satisfyM (== LOpenBracket)
elements <- many $ do
scnParserM
parseListItemM
scnParserM
_ <- satisfyM (== LCloseBracket)
pure (SList elements)
parseListItemM :: ParserM TricuAST
parseListItemM = choice
[ parseGroupedItemM
, parseListLiteralM
, parseSingleItemM
]
parseGroupedItemM :: ParserM TricuAST
parseGroupedItemM = do
_ <- satisfyM (== LOpenParen)
inner <- parseExpressionM
_ <- satisfyM (== LCloseParen)
pure inner
parseSingleItemM :: ParserM TricuAST
parseSingleItemM = do
token <- satisfyM (\case LIdentifier _ -> True; LKeywordT -> True; _ -> False)
if | LIdentifier name <- token -> pure (SVar name)
| token == LKeywordT -> pure TLeaf
| otherwise -> fail "Unexpected token in list item"
parseVarM :: ParserM TricuAST
parseVarM = do
satisfyM (\case LIdentifier _ -> True; _ -> False) >>= \case
LIdentifier name
| name == "t" || name == "!result" ->
fail ("Reserved keyword: " ++ name ++ " cannot be assigned.")
| otherwise ->
pure (SVar name)
_ -> fail "Unexpected token while parsing variable"
parseIntLiteralM :: ParserM TricuAST
parseIntLiteralM = do
let intL = (\case LIntegerLiteral _ -> True; _ -> False)
token <- satisfyM intL
if | LIntegerLiteral value <- token ->
pure (SInt value)
| otherwise ->
fail "Unexpected token while parsing integer literal"
parseStrLiteralM :: ParserM TricuAST
parseStrLiteralM = do
let strL = (\case LStringLiteral _ -> True; _ -> False)
token <- satisfyM strL
if | LStringLiteral value <- token ->
pure (SStr value)
| otherwise ->
fail "Unexpected token while parsing string literal"
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = errorWithoutStackTrace "Expected identifier"
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
let errors = bundleErrors bundle
formattedErrors = map formatError (Data.List.NonEmpty.toList errors)
in unlines ("Parse error(s) encountered:" : formattedErrors)
formatError :: ParseError [LToken] Void -> String
formatError (TrivialError offset unexpected expected) =
let unexpectedMsg = case unexpected of
Just x -> "unexpected token " ++ show x
Nothing -> "unexpected end of input"
expectedMsg = if null expected
then ""
else "expected " ++ show (Set.toList expected)
in "Parse error at offset " ++ show offset ++ ": " ++ unexpectedMsg ++
if null expectedMsg then "" else " " ++ expectedMsg
formatError (FancyError offset _) =
"Parse error at offset " ++ show offset ++ ": unexpected FancyError"