Files
tricu/src/Parser.hs
2026-05-16 14:59:52 -05:00

430 lines
12 KiB
Haskell

module Parser where
import Lexer
import Research
import Control.Monad (void)
import Data.Void (Void)
import Text.Megaparsec
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as Set
type TokParser = Parsec Void [LToken]
data Context = Top | Nested
deriving (Eq, Show)
reservedNames :: Set.Set String
reservedNames = Set.fromList ["t", "!result"]
parseTricu :: String -> [TricuAST]
parseTricu input =
let toks = lexTricu input
in case runParser programP "" toks of
Left err -> errorWithoutStackTrace (handleParseError toks err)
Right asts -> asts
parseSingle :: String -> TricuAST
parseSingle input =
let toks = lexTricu input
in case parseSingleExpr toks of
Left err -> errorWithoutStackTrace (handleParseError toks err)
Right ast -> ast
parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST]
parseProgram = runParser programP ""
parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST
parseSingleExpr = runParser singleP ""
programP :: TokParser [TricuAST]
programP = do
skipTopNewlines
imports <- many (importP <* skipTopNewlines)
items <- manyItemsP
eof
pure (imports ++ items)
singleP :: TokParser TricuAST
singleP = do
skipTopNewlines
item <- topItemP
skipTopNewlines
eof
pure item
manyItemsP :: TokParser [TricuAST]
manyItemsP = do
skipTopNewlines
done <- atEndP
if done
then pure []
else do
item <- topItemP
skipTopNewlines
rest <- manyItemsP
pure (item : rest)
topItemP :: TokParser TricuAST
topItemP = do
toks <- getInput
case toks of
LIdentifier _ : LAssign : _ -> definitionP
_ -> exprTopP
definitionP :: TokParser TricuAST
definitionP = do
name <- identifierNameP
void (tok (== LAssign) "=")
skipNestedNewlines
body <- exprTopP
pure (SDef name [] body)
importP :: TokParser TricuAST
importP = do
t <- tok isImport "import"
case t of
LImport path ns -> pure (SImport path ns)
_ -> fail "internal parser error: expected import token"
where
isImport (LImport _ _) = True
isImport _ = False
exprTopP :: TokParser TricuAST
exprTopP = do
toks <- getInput
case lambdaHeadTop toks of
Just params -> lambdaP Top params
Nothing -> pipeTopP
exprNestedP :: TokParser TricuAST
exprNestedP = do
skipNestedNewlines
toks <- getInput
case lambdaHeadNested toks of
Just params -> lambdaP Nested params
Nothing -> pipeNestedP
lambdaP :: Context -> [String] -> TokParser TricuAST
lambdaP ctx params = do
consumeLambdaHead ctx params
body <- case ctx of
Top -> exprTopP
Nested -> exprNestedP
pure (foldr (\p acc -> SLambda [p] acc) body params)
lambdaHeadTop :: [LToken] -> Maybe [String]
lambdaHeadTop toks =
case collectIdentifiersNoNewlines toks of
(params@(_:_), LColon : _) -> Just params
_ -> Nothing
lambdaHeadNested :: [LToken] -> Maybe [String]
lambdaHeadNested toks =
case collectIdentifiersWithNewlines (dropNewlines toks) of
(params@(_:_), rest) ->
case dropNewlines rest of
LColon : _ -> Just params
_ -> Nothing
_ -> Nothing
collectIdentifiersNoNewlines :: [LToken] -> ([String], [LToken])
collectIdentifiersNoNewlines (LIdentifier name : rest) =
let (names, final) = collectIdentifiersNoNewlines rest
in (name : names, final)
collectIdentifiersNoNewlines rest = ([], rest)
collectIdentifiersWithNewlines :: [LToken] -> ([String], [LToken])
collectIdentifiersWithNewlines (LIdentifier name : rest) =
let (names, final) = collectIdentifiersWithNewlines (dropNewlines rest)
in (name : names, final)
collectIdentifiersWithNewlines rest = ([], rest)
consumeLambdaHead :: Context -> [String] -> TokParser ()
consumeLambdaHead ctx params = do
case ctx of
Top -> pure ()
Nested -> skipNestedNewlines
mapM_ consumeParam params
case ctx of
Top -> pure ()
Nested -> skipNestedNewlines
void (tok (== LColon) ":")
skipNestedNewlines
where
consumeParam _ = do
void identifierNameP
case ctx of
Top -> pure ()
Nested -> skipNestedNewlines
data PipeOp = PipeBackward | PipeForward
deriving (Eq, Show)
applyPipe :: TricuAST -> (PipeOp, TricuAST) -> TricuAST
applyPipe acc (PipeBackward, rhs) =
SApp acc rhs
applyPipe acc (PipeForward, rhs) =
SApp rhs acc
pipeTopP :: TokParser TricuAST
pipeTopP =
pipeChainP appTopP appNestedP
pipeNestedP :: TokParser TricuAST
pipeNestedP =
pipeChainP appNestedP appNestedP
pipeChainP :: TokParser TricuAST -> TokParser TricuAST -> TokParser TricuAST
pipeChainP parseFirst parseOperand = do
first <- parseFirst
rest <- many (try pipeSegmentP)
pure (foldl applyPipe first rest)
where
pipeSegmentP = do
skipNestedNewlines
op <- pipeOpP
skipNestedNewlines
rhs <- parseOperand
pure (op, rhs)
pipeOpP :: TokParser PipeOp
pipeOpP =
(tok (== LArrowLeft) "<|" *> pure PipeBackward)
<|> (tok (== LArrowRight) "|>" *> pure PipeForward)
appTopP :: TokParser TricuAST
appTopP = do
first <- atomTopP
appRestTopP first
appRestTopP :: TricuAST -> TokParser TricuAST
appRestTopP acc = do
mt <- peekP
case mt of
Just t | startsAtom t -> do
arg <- atomTopP
appRestTopP (SApp acc arg)
_ -> pure acc
appNestedP :: TokParser TricuAST
appNestedP = do
first <- atomNestedP
appRestNestedP first
appRestNestedP :: TricuAST -> TokParser TricuAST
appRestNestedP acc = do
skipNestedNewlines
mt <- peekP
case mt of
Just t | startsAtom t -> do
arg <- atomNestedP
appRestNestedP (SApp acc arg)
_ -> pure acc
startsAtom :: LToken -> Bool
startsAtom LOpenParen = True
startsAtom LOpenBracket = True
startsAtom (LIdentifier _) = True
startsAtom (LIdentifierWithHash _ _) = True
startsAtom (LNamespace _) = True
startsAtom LKeywordT = True
startsAtom (LIntegerLiteral _) = True
startsAtom (LStringLiteral _) = True
startsAtom _ = False
atomTopP :: TokParser TricuAST
atomTopP = do
toks <- getInput
case toks of
LOpenParen : _ -> groupedP
LOpenBracket : _ -> listP
LNamespace _ : LDot : _ -> namespacedVarP
LIdentifier _ : _ -> plainVarP
LIdentifierWithHash _ _ : _ -> plainVarP
LKeywordT : _ -> leafP
LIntegerLiteral _ : _ -> intP
LStringLiteral _ : _ -> strP
_ -> fail "expected expression atom"
atomNestedP :: TokParser TricuAST
atomNestedP = skipNestedNewlines *> atomTopP
groupedP :: TokParser TricuAST
groupedP = do
void (tok (== LOpenParen) "(")
skipNestedNewlines
expr <- exprNestedP
skipNestedNewlines
void (tok (== LCloseParen) ")")
pure expr
listP :: TokParser TricuAST
listP = do
void (tok (== LOpenBracket) "[")
skipNestedNewlines
xs <- listElementsP
skipNestedNewlines
void (tok (== LCloseBracket) "]")
pure (SList xs)
listElementsP :: TokParser [TricuAST]
listElementsP = do
skipNestedNewlines
mt <- peekP
case mt of
Just LCloseBracket -> pure []
Just t | startsAtom t -> do
x <- listElementP
xs <- listElementsP
pure (x : xs)
_ -> pure []
listElementP :: TokParser TricuAST
listElementP = do
toks <- getInput
case toks of
LOpenParen : _ -> groupedP
LOpenBracket : _ -> listP
LNamespace _ : LDot : _ -> namespacedVarP
LIdentifier _ : _ -> plainVarP
LIdentifierWithHash _ _ : _ -> plainVarP
LKeywordT : _ -> leafP
LIntegerLiteral _ : _ -> intP
LStringLiteral _ : _ -> strP
_ -> fail "expected list element"
leafP :: TokParser TricuAST
leafP = tok (== LKeywordT) "t" *> pure TLeaf
plainVarP :: TokParser TricuAST
plainVarP = do
t <- tok isVar "identifier"
case t of
LIdentifier name -> pure (SVar name Nothing)
LIdentifierWithHash name hash -> pure (SVar name (Just hash))
_ -> fail "internal parser error: expected identifier"
where
isVar (LIdentifier _) = True
isVar (LIdentifierWithHash _ _) = True
isVar _ = False
namespacedVarP :: TokParser TricuAST
namespacedVarP = do
nsTok <- tok isNamespace "namespace"
void (tok (== LDot) ".")
nameTok <- tok isVar "identifier"
case (nsTok, nameTok) of
(LNamespace ns, LIdentifier name) ->
pure (SVar (ns ++ "." ++ name) Nothing)
(LNamespace ns, LIdentifierWithHash name hash) ->
pure (SVar (ns ++ "." ++ name) (Just hash))
_ -> fail "internal parser error: expected namespaced identifier"
where
isNamespace (LNamespace _) = True
isNamespace _ = False
isVar (LIdentifier _) = True
isVar (LIdentifierWithHash _ _) = True
isVar _ = False
intP :: TokParser TricuAST
intP = do
t <- tok isInt "integer"
case t of
LIntegerLiteral n -> pure (SInt (fromIntegral n))
_ -> fail "internal parser error: expected integer"
where
isInt (LIntegerLiteral _) = True
isInt _ = False
strP :: TokParser TricuAST
strP = do
t <- tok isStr "string"
case t of
LStringLiteral s -> pure (SStr s)
_ -> fail "internal parser error: expected string"
where
isStr (LStringLiteral _) = True
isStr _ = False
identifierNameP :: TokParser String
identifierNameP = do
t <- tok isIdentifier "identifier"
case t of
LIdentifier name
| name `Set.member` reservedNames ->
fail ("reserved name cannot be used as identifier: " ++ name)
| otherwise ->
pure name
_ -> fail "internal parser error: expected identifier"
where
isIdentifier (LIdentifier _) = True
isIdentifier _ = False
tok :: (LToken -> Bool) -> String -> TokParser LToken
tok predicate expected = satisfy predicate <?> expected
peekP :: TokParser (Maybe LToken)
peekP = do
toks <- getInput
pure $ case toks of
[] -> Nothing
x : _ -> Just x
atEndP :: TokParser Bool
atEndP = null <$> getInput
skipTopNewlines :: TokParser ()
skipTopNewlines = skipMany (tok (== LNewline) "newline")
skipNestedNewlines :: TokParser ()
skipNestedNewlines = skipMany (tok (== LNewline) "newline")
dropNewlines :: [LToken] -> [LToken]
dropNewlines (LNewline : rest) = dropNewlines rest
dropNewlines rest = rest
handleParseError :: [LToken] -> ParseErrorBundle [LToken] Void -> String
handleParseError toks bundle =
unlines
( "Parse error(s) encountered:"
: map (formatError toks) (NE.toList (bundleErrors bundle))
)
formatError :: [LToken] -> ParseError [LToken] Void -> String
formatError toks err =
case err of
TrivialError offset unexpected expected ->
let unexpectedMsg =
case unexpected of
Nothing -> "unexpected end of input"
Just x -> "unexpected " ++ show x
expectedMsg =
if Set.null expected
then ""
else "; expected one of " ++ show (Set.toList expected)
in
"Parse error at token offset " ++ show offset ++ ": " ++ unexpectedMsg ++ expectedMsg
++ "\nToken context:\n" ++ tokenContext toks offset
FancyError offset fancy ->
"Parse error at token offset " ++ show offset ++ ": " ++ show (Set.toList fancy)
++ "\nToken context:\n" ++ tokenContext toks offset
tokenContext :: [LToken] -> Int -> String
tokenContext toks off =
let start = max 0 (off - 5)
end = min (length toks) (off + 6)
rows = zip [start ..] (take (end - start) (drop start toks))
in unlines (map render rows)
where
render (i, token)
| i == off = ">>> " ++ show i ++ ": " ++ show token
| otherwise = " " ++ show i ++ ": " ++ show token