Adds support for REPL namespacing, primarily to avoid `main` collisions. Also adds a library function for an ergonomic pattern matching function that I've been noodling on. I might explore ways to make list syntax less annoying specifically for pattern matching like this.
313 lines
8.9 KiB
Haskell
313 lines
8.9 KiB
Haskell
module Parser where
|
|
|
|
import Lexer
|
|
import Research
|
|
|
|
import Control.Monad (void)
|
|
import Control.Monad.State
|
|
import Data.List.NonEmpty (toList)
|
|
import Data.Void (Void)
|
|
import Text.Megaparsec
|
|
import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty)
|
|
import qualified Data.Set as Set
|
|
|
|
data PState = PState
|
|
{ parenDepth :: Int
|
|
, bracketDepth :: Int
|
|
} deriving (Show)
|
|
|
|
type ParserM = StateT PState (Parsec Void [LToken])
|
|
|
|
satisfyM :: (LToken -> Bool) -> ParserM LToken
|
|
satisfyM f = do
|
|
token <- lift (satisfy f)
|
|
modify' (updateDepth token)
|
|
return token
|
|
|
|
updateDepth :: LToken -> PState -> PState
|
|
updateDepth LOpenParen st = st { parenDepth = parenDepth st + 1 }
|
|
updateDepth LOpenBracket st = st { bracketDepth = bracketDepth st + 1 }
|
|
updateDepth LCloseParen st = st { parenDepth = parenDepth st - 1 }
|
|
updateDepth LCloseBracket st = st { bracketDepth = bracketDepth st - 1 }
|
|
updateDepth _ st = st
|
|
|
|
topLevelNewline :: ParserM ()
|
|
topLevelNewline = do
|
|
st <- get
|
|
if parenDepth st == 0 && bracketDepth st == 0
|
|
then void (satisfyM (== LNewline))
|
|
else fail "Top-level exit in nested context (paren or bracket)"
|
|
|
|
parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST]
|
|
parseProgram tokens =
|
|
runParser (evalStateT (parseProgramM <* finalizeDepth <* eof) (PState 0 0)) "" tokens
|
|
|
|
parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST
|
|
parseSingleExpr tokens =
|
|
runParser (evalStateT (scnParserM *> parseExpressionM <* finalizeDepth <* eof) (PState 0 0)) "" tokens
|
|
|
|
finalizeDepth :: ParserM ()
|
|
finalizeDepth = do
|
|
st <- get
|
|
case (parenDepth st, bracketDepth st) of
|
|
(0, 0) -> pure ()
|
|
(p, b) -> fail $ "Unmatched tokens: " ++ show (p, b)
|
|
|
|
parseTricu :: String -> [TricuAST]
|
|
parseTricu input =
|
|
case lexTricu input of
|
|
[] -> []
|
|
toks ->
|
|
case parseProgram toks of
|
|
Left err -> errorWithoutStackTrace (handleParseError err)
|
|
Right asts -> asts
|
|
|
|
parseSingle :: String -> TricuAST
|
|
parseSingle input =
|
|
case lexTricu input of
|
|
[] -> SEmpty
|
|
toks ->
|
|
case parseSingleExpr toks of
|
|
Left err -> errorWithoutStackTrace (handleParseError err)
|
|
Right ast -> ast
|
|
|
|
parseProgramM :: ParserM [TricuAST]
|
|
parseProgramM = do
|
|
skipMany topLevelNewline
|
|
importNodes <- many (do
|
|
node <- parseImportM
|
|
skipMany topLevelNewline
|
|
return node)
|
|
skipMany topLevelNewline
|
|
exprs <- sepEndBy parseOneExpression (some topLevelNewline)
|
|
skipMany topLevelNewline
|
|
return (importNodes ++ exprs)
|
|
|
|
parseImportM :: ParserM TricuAST
|
|
parseImportM = do
|
|
LImport filePath moduleName <- satisfyM isImport
|
|
pure (SImport filePath moduleName)
|
|
where
|
|
isImport (LImport _ _) = True
|
|
isImport _ = False
|
|
|
|
parseOneExpression :: ParserM TricuAST
|
|
parseOneExpression = scnParserM *> parseExpressionM
|
|
|
|
scnParserM :: ParserM ()
|
|
scnParserM = skipMany $ do
|
|
t <- lookAhead anySingle
|
|
st <- get
|
|
if | (parenDepth st > 0 || bracketDepth st > 0) && (t == LNewline) ->
|
|
void $ satisfyM (== LNewline)
|
|
| otherwise ->
|
|
fail "In nested context or no space token" <|> empty
|
|
|
|
eofM :: ParserM ()
|
|
eofM = lift eof
|
|
|
|
parseExpressionM :: ParserM TricuAST
|
|
parseExpressionM = choice
|
|
[ try parseFunctionM
|
|
, try parseLambdaM
|
|
, try parseLambdaExpressionM
|
|
, try parseListLiteralM
|
|
, try parseApplicationM
|
|
, try parseTreeTermM
|
|
, parseLiteralM
|
|
]
|
|
|
|
parseFunctionM :: ParserM TricuAST
|
|
parseFunctionM = do
|
|
let ident = (\case LIdentifier _ -> True; _ -> False)
|
|
LIdentifier name <- satisfyM ident
|
|
args <- many $ satisfyM ident
|
|
_ <- satisfyM (== LAssign)
|
|
scnParserM
|
|
body <- parseExpressionM
|
|
pure (SDef name (map getIdentifier args) body)
|
|
|
|
parseLambdaM :: ParserM TricuAST
|
|
parseLambdaM = do
|
|
let ident = (\case LIdentifier _ -> True; _ -> False)
|
|
_ <- satisfyM (== LBackslash)
|
|
params <- some (satisfyM ident)
|
|
_ <- satisfyM (== LColon)
|
|
scnParserM
|
|
body <- parseLambdaExpressionM
|
|
pure $ foldr (\param acc -> SLambda [getIdentifier param] acc) body params
|
|
|
|
parseLambdaExpressionM :: ParserM TricuAST
|
|
parseLambdaExpressionM = choice
|
|
[ try parseLambdaApplicationM
|
|
, parseAtomicLambdaM
|
|
]
|
|
|
|
parseAtomicLambdaM :: ParserM TricuAST
|
|
parseAtomicLambdaM = choice
|
|
[ parseVarM
|
|
, parseTreeLeafM
|
|
, parseLiteralM
|
|
, parseListLiteralM
|
|
, try parseLambdaM
|
|
, between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM
|
|
]
|
|
|
|
parseApplicationM :: ParserM TricuAST
|
|
parseApplicationM = do
|
|
func <- parseAtomicBaseM
|
|
scnParserM
|
|
args <- many $ do
|
|
scnParserM
|
|
arg <- parseAtomicM
|
|
return arg
|
|
return $ foldl SApp func args
|
|
|
|
parseLambdaApplicationM :: ParserM TricuAST
|
|
parseLambdaApplicationM = do
|
|
func <- parseAtomicLambdaM
|
|
scnParserM
|
|
args <- many $ do
|
|
arg <- parseAtomicLambdaM
|
|
scnParserM
|
|
pure arg
|
|
pure $ foldl SApp func args
|
|
|
|
parseAtomicBaseM :: ParserM TricuAST
|
|
parseAtomicBaseM = choice
|
|
[ parseTreeLeafM
|
|
, parseGroupedM
|
|
]
|
|
|
|
parseTreeLeafM :: ParserM TricuAST
|
|
parseTreeLeafM = do
|
|
let keyword = (\case LKeywordT -> True; _ -> False)
|
|
_ <- satisfyM keyword
|
|
notFollowedBy $ lift $ satisfy (== LAssign)
|
|
pure TLeaf
|
|
|
|
parseTreeTermM :: ParserM TricuAST
|
|
parseTreeTermM = do
|
|
base <- parseTreeLeafOrParenthesizedM
|
|
rest <- many parseTreeLeafOrParenthesizedM
|
|
pure (foldl combine base rest)
|
|
where
|
|
combine acc next
|
|
| TLeaf <- acc = TStem next
|
|
| TStem t <- acc = TFork t next
|
|
| TFork _ _ <- acc = TFork acc next
|
|
|
|
parseTreeLeafOrParenthesizedM :: ParserM TricuAST
|
|
parseTreeLeafOrParenthesizedM = choice
|
|
[ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM
|
|
, parseTreeLeafM
|
|
]
|
|
|
|
parseAtomicM :: ParserM TricuAST
|
|
parseAtomicM = choice
|
|
[ parseVarM
|
|
, parseTreeLeafM
|
|
, parseListLiteralM
|
|
, parseGroupedM
|
|
, parseLiteralM
|
|
]
|
|
|
|
parseGroupedM :: ParserM TricuAST
|
|
parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $
|
|
scnParserM *> parseExpressionM <* scnParserM
|
|
|
|
parseLiteralM :: ParserM TricuAST
|
|
parseLiteralM = choice
|
|
[ parseIntLiteralM
|
|
, parseStrLiteralM
|
|
]
|
|
|
|
parseListLiteralM :: ParserM TricuAST
|
|
parseListLiteralM = do
|
|
_ <- satisfyM (== LOpenBracket)
|
|
elements <- many $ do
|
|
scnParserM
|
|
parseListItemM
|
|
scnParserM
|
|
_ <- satisfyM (== LCloseBracket)
|
|
pure (SList elements)
|
|
|
|
parseListItemM :: ParserM TricuAST
|
|
parseListItemM = choice
|
|
[ parseGroupedItemM
|
|
, parseListLiteralM
|
|
, parseSingleItemM
|
|
]
|
|
|
|
parseGroupedItemM :: ParserM TricuAST
|
|
parseGroupedItemM = do
|
|
_ <- satisfyM (== LOpenParen)
|
|
inner <- parseExpressionM
|
|
_ <- satisfyM (== LCloseParen)
|
|
pure inner
|
|
|
|
parseSingleItemM :: ParserM TricuAST
|
|
parseSingleItemM = do
|
|
token <- satisfyM (\case LIdentifier _ -> True; LKeywordT -> True; _ -> False)
|
|
if | LIdentifier name <- token -> pure (SVar name)
|
|
| token == LKeywordT -> pure TLeaf
|
|
| otherwise -> fail "Unexpected token in list item"
|
|
|
|
parseVarM :: ParserM TricuAST
|
|
parseVarM = do
|
|
token <- satisfyM (\case
|
|
LNamespace _ -> True
|
|
LIdentifier _ -> True
|
|
_ -> False)
|
|
case token of
|
|
LNamespace ns -> do
|
|
_ <- satisfyM (== LDot)
|
|
LIdentifier name <- satisfyM (\case LIdentifier _ -> True; _ -> False)
|
|
pure $ SVar (ns ++ "." ++ name)
|
|
LIdentifier name
|
|
| name == "t" || name == "!result" ->
|
|
fail ("Reserved keyword: " ++ name ++ " cannot be assigned.")
|
|
| otherwise -> pure (SVar name)
|
|
_ -> fail "Unexpected token while parsing variable"
|
|
|
|
parseIntLiteralM :: ParserM TricuAST
|
|
parseIntLiteralM = do
|
|
let intL = (\case LIntegerLiteral _ -> True; _ -> False)
|
|
token <- satisfyM intL
|
|
if | LIntegerLiteral value <- token ->
|
|
pure (SInt value)
|
|
| otherwise ->
|
|
fail "Unexpected token while parsing integer literal"
|
|
|
|
parseStrLiteralM :: ParserM TricuAST
|
|
parseStrLiteralM = do
|
|
let strL = (\case LStringLiteral _ -> True; _ -> False)
|
|
token <- satisfyM strL
|
|
if | LStringLiteral value <- token ->
|
|
pure (SStr value)
|
|
| otherwise ->
|
|
fail "Unexpected token while parsing string literal"
|
|
|
|
getIdentifier :: LToken -> String
|
|
getIdentifier (LIdentifier name) = name
|
|
getIdentifier _ = errorWithoutStackTrace "Expected identifier"
|
|
|
|
handleParseError :: ParseErrorBundle [LToken] Void -> String
|
|
handleParseError bundle =
|
|
let errors = bundleErrors bundle
|
|
formattedErrors = map formatError (Data.List.NonEmpty.toList errors)
|
|
in unlines ("Parse error(s) encountered:" : formattedErrors)
|
|
|
|
formatError :: ParseError [LToken] Void -> String
|
|
formatError (TrivialError offset unexpected expected) =
|
|
let unexpectedMsg = case unexpected of
|
|
Just x -> "unexpected token " ++ show x
|
|
Nothing -> "unexpected end of input"
|
|
expectedMsg = if null expected
|
|
then ""
|
|
else "expected " ++ show (Set.toList expected)
|
|
in "Parse error at offset " ++ show offset ++ ": " ++ unexpectedMsg ++
|
|
if null expectedMsg then "" else " " ++ expectedMsg
|
|
formatError (FancyError offset _) =
|
|
"Parse error at offset " ++ show offset ++ ": unexpected FancyError"
|