tricu/src/Parser.hs

293 lines
8.4 KiB
Haskell
Raw Normal View History

module Parser where
import Lexer
2025-01-20 19:20:29 -06:00
import Research
2024-12-29 20:29:41 -06:00
2025-01-20 19:20:29 -06:00
import Control.Monad (void)
import Control.Monad.State
import Data.List.NonEmpty (toList)
2024-12-29 20:29:41 -06:00
import Data.Void (Void)
import Text.Megaparsec
import Text.Megaparsec.Error (ParseErrorBundle, errorBundlePretty)
2024-12-29 20:29:41 -06:00
import qualified Data.Set as Set
2025-01-20 19:20:29 -06:00
data PState = PState
{ parenDepth :: Int
, bracketDepth :: Int
2025-01-20 19:20:29 -06:00
} deriving (Show)
type ParserM = StateT PState (Parsec Void [LToken])
satisfyM :: (LToken -> Bool) -> ParserM LToken
satisfyM f = do
token <- lift (satisfy f)
modify' (updateDepth token)
return token
updateDepth :: LToken -> PState -> PState
2025-01-21 16:16:04 -06:00
updateDepth LOpenParen st = st { parenDepth = parenDepth st + 1 }
updateDepth LOpenBracket st = st { bracketDepth = bracketDepth st + 1 }
updateDepth LCloseParen st = st { parenDepth = parenDepth st - 1 }
updateDepth LCloseBracket st = st { bracketDepth = bracketDepth st - 1 }
updateDepth _ st = st
2025-01-20 19:20:29 -06:00
topLevelNewline :: ParserM ()
topLevelNewline = do
st <- get
if parenDepth st == 0 && bracketDepth st == 0
2025-01-20 19:20:29 -06:00
then void (satisfyM (== LNewline))
else fail "Top-level exit in nested context (paren or bracket)"
2025-01-20 19:20:29 -06:00
parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST]
parseProgram tokens =
runParser (evalStateT (parseProgramM <* finalizeDepth <* eof) (PState 0 0)) "" tokens
2025-01-20 19:20:29 -06:00
parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST
parseSingleExpr tokens =
runParser (evalStateT (scnParserM *> parseExpressionM <* finalizeDepth <* eof) (PState 0 0)) "" tokens
finalizeDepth :: ParserM ()
finalizeDepth = do
st <- get
case (parenDepth st, bracketDepth st) of
(0, 0) -> pure ()
(p, b) -> fail $ "Unmatched tokens: " ++ show (p, b)
2024-12-29 08:29:25 -06:00
parseTricu :: String -> [TricuAST]
2025-01-20 19:20:29 -06:00
parseTricu input =
case lexTricu input of
[] -> []
toks ->
case parseProgram toks of
Left err -> errorWithoutStackTrace (handleParseError err)
2025-01-20 19:20:29 -06:00
Right asts -> asts
2024-12-29 08:29:25 -06:00
parseSingle :: String -> TricuAST
2025-01-20 19:20:29 -06:00
parseSingle input =
case lexTricu input of
[] -> SEmpty
toks ->
case parseSingleExpr toks of
Left err -> errorWithoutStackTrace (handleParseError err)
2025-01-20 19:20:29 -06:00
Right ast -> ast
parseProgramM :: ParserM [TricuAST]
parseProgramM = do
skipMany topLevelNewline
exprs <- sepEndBy parseOneExpression (some topLevelNewline)
skipMany topLevelNewline
return exprs
parseOneExpression :: ParserM TricuAST
parseOneExpression = scnParserM *> parseExpressionM
scnParserM :: ParserM ()
scnParserM = skipMany $ do
2025-01-21 14:21:47 -06:00
t <- lookAhead anySingle
2025-01-20 19:20:29 -06:00
st <- get
if | (parenDepth st > 0 || bracketDepth st > 0) && (t == LNewline) ->
void $ satisfyM (== LNewline)
| otherwise ->
fail "In nested context or no space token" <|> empty
2025-01-20 19:20:29 -06:00
eofM :: ParserM ()
eofM = lift eof
parseExpressionM :: ParserM TricuAST
parseExpressionM = choice
[ try parseFunctionM
, try parseLambdaM
, try parseLambdaExpressionM
, try parseListLiteralM
, try parseApplicationM
, try parseTreeTermM
, parseLiteralM
]
2025-01-20 19:20:29 -06:00
parseFunctionM :: ParserM TricuAST
parseFunctionM = do
let ident = (\case LIdentifier _ -> True; _ -> False)
LIdentifier name <- satisfyM ident
args <- many $ satisfyM ident
2025-01-20 19:20:29 -06:00
_ <- satisfyM (== LAssign)
scnParserM
body <- parseExpressionM
pure (SDef name (map getIdentifier args) body)
2025-01-20 19:20:29 -06:00
parseLambdaM :: ParserM TricuAST
parseLambdaM = do
let ident = (\case LIdentifier _ -> True; _ -> False)
_ <- satisfyM (== LBackslash)
params <- some (satisfyM ident)
_ <- satisfyM (== LColon)
scnParserM
body <- parseLambdaExpressionM
pure $ foldr (\param acc -> SLambda [getIdentifier param] acc) body params
2025-01-20 19:20:29 -06:00
parseLambdaExpressionM :: ParserM TricuAST
parseLambdaExpressionM = choice
[ try parseLambdaApplicationM
, parseAtomicLambdaM
2024-12-27 08:17:06 -06:00
]
2025-01-20 19:20:29 -06:00
parseAtomicLambdaM :: ParserM TricuAST
parseAtomicLambdaM = choice
[ parseVarM
, parseTreeLeafM
, parseLiteralM
, parseListLiteralM
, try parseLambdaM
, between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseLambdaExpressionM
2024-12-27 08:17:06 -06:00
]
2025-01-20 19:20:29 -06:00
parseApplicationM :: ParserM TricuAST
parseApplicationM = do
func <- parseAtomicBaseM
scnParserM
args <- many $ do
scnParserM
arg <- parseAtomicM
return arg
return $ foldl SApp func args
parseLambdaApplicationM :: ParserM TricuAST
parseLambdaApplicationM = do
func <- parseAtomicLambdaM
scnParserM
args <- many $ do
arg <- parseAtomicLambdaM
scnParserM
pure arg
pure $ foldl SApp func args
parseAtomicBaseM :: ParserM TricuAST
parseAtomicBaseM = choice
[ parseTreeLeafM
, parseGroupedM
]
2025-01-20 19:20:29 -06:00
parseTreeLeafM :: ParserM TricuAST
parseTreeLeafM = do
let keyword = (\case LKeywordT -> True; _ -> False)
_ <- satisfyM keyword
2025-01-21 14:21:47 -06:00
notFollowedBy $ lift $ satisfy (== LAssign)
2025-01-20 19:20:29 -06:00
pure TLeaf
parseTreeTermM :: ParserM TricuAST
parseTreeTermM = do
base <- parseTreeLeafOrParenthesizedM
rest <- many parseTreeLeafOrParenthesizedM
pure (foldl combine base rest)
where
combine acc next
| TLeaf <- acc = TStem next
| TStem t <- acc = TFork t next
| TFork _ _ <- acc = TFork acc next
2025-01-20 19:20:29 -06:00
parseTreeLeafOrParenthesizedM :: ParserM TricuAST
parseTreeLeafOrParenthesizedM = choice
[ between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) parseTreeTermM
, parseTreeLeafM
]
2025-01-20 19:20:29 -06:00
parseAtomicM :: ParserM TricuAST
parseAtomicM = choice
[ parseVarM
, parseTreeLeafM
, parseListLiteralM
, parseGroupedM
, parseLiteralM
]
2025-01-20 19:20:29 -06:00
parseGroupedM :: ParserM TricuAST
parseGroupedM = between (satisfyM (== LOpenParen)) (satisfyM (== LCloseParen)) $
scnParserM *> parseExpressionM <* scnParserM
2025-01-20 19:20:29 -06:00
parseLiteralM :: ParserM TricuAST
parseLiteralM = choice
[ parseIntLiteralM
, parseStrLiteralM
]
2025-01-20 19:20:29 -06:00
parseListLiteralM :: ParserM TricuAST
parseListLiteralM = do
_ <- satisfyM (== LOpenBracket)
elements <- many $ do
scnParserM
parseListItemM
scnParserM
2025-01-20 19:20:29 -06:00
_ <- satisfyM (== LCloseBracket)
pure (SList elements)
parseListItemM :: ParserM TricuAST
parseListItemM = choice
[ parseGroupedItemM
, parseListLiteralM
, parseSingleItemM
]
2025-01-20 19:20:29 -06:00
parseGroupedItemM :: ParserM TricuAST
parseGroupedItemM = do
_ <- satisfyM (== LOpenParen)
inner <- parseExpressionM
_ <- satisfyM (== LCloseParen)
pure inner
2025-01-20 19:20:29 -06:00
parseSingleItemM :: ParserM TricuAST
parseSingleItemM = do
token <- satisfyM (\case LIdentifier _ -> True; LKeywordT -> True; _ -> False)
if | LIdentifier name <- token -> pure (SVar name)
| token == LKeywordT -> pure TLeaf
| otherwise -> fail "Unexpected token in list item"
2025-01-20 19:20:29 -06:00
parseVarM :: ParserM TricuAST
parseVarM = do
satisfyM (\case LIdentifier _ -> True; _ -> False) >>= \case
LIdentifier name
| name == "t" || name == "__result" ->
fail ("Reserved keyword: " ++ name ++ " cannot be assigned.")
| otherwise ->
pure (SVar name)
_ -> fail "Unexpected token while parsing variable"
2025-01-20 19:20:29 -06:00
parseIntLiteralM :: ParserM TricuAST
parseIntLiteralM = do
let intL = (\case LIntegerLiteral _ -> True; _ -> False)
token <- satisfyM intL
if | LIntegerLiteral value <- token ->
pure (SInt value)
| otherwise ->
fail "Unexpected token while parsing integer literal"
2025-01-20 19:20:29 -06:00
parseStrLiteralM :: ParserM TricuAST
parseStrLiteralM = do
let strL = (\case LStringLiteral _ -> True; _ -> False)
token <- satisfyM strL
if | LStringLiteral value <- token ->
pure (SStr value)
| otherwise ->
fail "Unexpected token while parsing string literal"
2025-01-20 19:20:29 -06:00
getIdentifier :: LToken -> String
getIdentifier (LIdentifier name) = name
getIdentifier _ = errorWithoutStackTrace "Expected identifier"
handleParseError :: ParseErrorBundle [LToken] Void -> String
handleParseError bundle =
let errors = bundleErrors bundle
formattedErrors = map formatError (Data.List.NonEmpty.toList errors)
in unlines ("Parse error(s) encountered:" : formattedErrors)
formatError :: ParseError [LToken] Void -> String
formatError (TrivialError offset unexpected expected) =
let unexpectedMsg = case unexpected of
Just x -> "unexpected token " ++ show x
Nothing -> "unexpected end of input"
expectedMsg = if null expected
then ""
else "expected " ++ show (Set.toList expected)
in "Parse error at offset " ++ show offset ++ ": " ++ unexpectedMsg ++
if null expectedMsg then "" else " " ++ expectedMsg
formatError (FancyError offset _) =
"Parse error at offset " ++ show offset ++ ": unexpected FancyError"