430 lines
12 KiB
Haskell
430 lines
12 KiB
Haskell
module Parser where
|
|
|
|
import Lexer
|
|
import Research
|
|
|
|
import Control.Monad (void)
|
|
import Data.Void (Void)
|
|
import Text.Megaparsec
|
|
|
|
import qualified Data.List.NonEmpty as NE
|
|
import qualified Data.Set as Set
|
|
|
|
type TokParser = Parsec Void [LToken]
|
|
|
|
data Context = Top | Nested
|
|
deriving (Eq, Show)
|
|
|
|
reservedNames :: Set.Set String
|
|
reservedNames = Set.fromList ["t", "!result"]
|
|
|
|
parseTricu :: String -> [TricuAST]
|
|
parseTricu input =
|
|
let toks = lexTricu input
|
|
in case runParser programP "" toks of
|
|
Left err -> errorWithoutStackTrace (handleParseError toks err)
|
|
Right asts -> asts
|
|
|
|
parseSingle :: String -> TricuAST
|
|
parseSingle input =
|
|
let toks = lexTricu input
|
|
in case parseSingleExpr toks of
|
|
Left err -> errorWithoutStackTrace (handleParseError toks err)
|
|
Right ast -> ast
|
|
|
|
parseProgram :: [LToken] -> Either (ParseErrorBundle [LToken] Void) [TricuAST]
|
|
parseProgram = runParser programP ""
|
|
|
|
parseSingleExpr :: [LToken] -> Either (ParseErrorBundle [LToken] Void) TricuAST
|
|
parseSingleExpr = runParser singleP ""
|
|
|
|
programP :: TokParser [TricuAST]
|
|
programP = do
|
|
skipTopNewlines
|
|
imports <- many (importP <* skipTopNewlines)
|
|
items <- manyItemsP
|
|
eof
|
|
pure (imports ++ items)
|
|
|
|
singleP :: TokParser TricuAST
|
|
singleP = do
|
|
skipTopNewlines
|
|
item <- topItemP
|
|
skipTopNewlines
|
|
eof
|
|
pure item
|
|
|
|
manyItemsP :: TokParser [TricuAST]
|
|
manyItemsP = do
|
|
skipTopNewlines
|
|
done <- atEndP
|
|
if done
|
|
then pure []
|
|
else do
|
|
item <- topItemP
|
|
skipTopNewlines
|
|
rest <- manyItemsP
|
|
pure (item : rest)
|
|
|
|
topItemP :: TokParser TricuAST
|
|
topItemP = do
|
|
toks <- getInput
|
|
case toks of
|
|
LIdentifier _ : LAssign : _ -> definitionP
|
|
_ -> exprTopP
|
|
|
|
definitionP :: TokParser TricuAST
|
|
definitionP = do
|
|
name <- identifierNameP
|
|
void (tok (== LAssign) "=")
|
|
skipNestedNewlines
|
|
body <- exprTopP
|
|
pure (SDef name [] body)
|
|
|
|
importP :: TokParser TricuAST
|
|
importP = do
|
|
t <- tok isImport "import"
|
|
case t of
|
|
LImport path ns -> pure (SImport path ns)
|
|
_ -> fail "internal parser error: expected import token"
|
|
where
|
|
isImport (LImport _ _) = True
|
|
isImport _ = False
|
|
|
|
exprTopP :: TokParser TricuAST
|
|
exprTopP = do
|
|
toks <- getInput
|
|
case lambdaHeadTop toks of
|
|
Just params -> lambdaP Top params
|
|
Nothing -> pipeTopP
|
|
|
|
exprNestedP :: TokParser TricuAST
|
|
exprNestedP = do
|
|
skipNestedNewlines
|
|
toks <- getInput
|
|
case lambdaHeadNested toks of
|
|
Just params -> lambdaP Nested params
|
|
Nothing -> pipeNestedP
|
|
|
|
lambdaP :: Context -> [String] -> TokParser TricuAST
|
|
lambdaP ctx params = do
|
|
consumeLambdaHead ctx params
|
|
body <- case ctx of
|
|
Top -> exprTopP
|
|
Nested -> exprNestedP
|
|
pure (foldr (\p acc -> SLambda [p] acc) body params)
|
|
|
|
lambdaHeadTop :: [LToken] -> Maybe [String]
|
|
lambdaHeadTop toks =
|
|
case collectIdentifiersNoNewlines toks of
|
|
(params@(_:_), LColon : _) -> Just params
|
|
_ -> Nothing
|
|
|
|
lambdaHeadNested :: [LToken] -> Maybe [String]
|
|
lambdaHeadNested toks =
|
|
case collectIdentifiersWithNewlines (dropNewlines toks) of
|
|
(params@(_:_), rest) ->
|
|
case dropNewlines rest of
|
|
LColon : _ -> Just params
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
|
|
collectIdentifiersNoNewlines :: [LToken] -> ([String], [LToken])
|
|
collectIdentifiersNoNewlines (LIdentifier name : rest) =
|
|
let (names, final) = collectIdentifiersNoNewlines rest
|
|
in (name : names, final)
|
|
collectIdentifiersNoNewlines rest = ([], rest)
|
|
|
|
collectIdentifiersWithNewlines :: [LToken] -> ([String], [LToken])
|
|
collectIdentifiersWithNewlines (LIdentifier name : rest) =
|
|
let (names, final) = collectIdentifiersWithNewlines (dropNewlines rest)
|
|
in (name : names, final)
|
|
collectIdentifiersWithNewlines rest = ([], rest)
|
|
|
|
consumeLambdaHead :: Context -> [String] -> TokParser ()
|
|
consumeLambdaHead ctx params = do
|
|
case ctx of
|
|
Top -> pure ()
|
|
Nested -> skipNestedNewlines
|
|
|
|
mapM_ consumeParam params
|
|
|
|
case ctx of
|
|
Top -> pure ()
|
|
Nested -> skipNestedNewlines
|
|
|
|
void (tok (== LColon) ":")
|
|
skipNestedNewlines
|
|
where
|
|
consumeParam _ = do
|
|
void identifierNameP
|
|
case ctx of
|
|
Top -> pure ()
|
|
Nested -> skipNestedNewlines
|
|
|
|
data PipeOp = PipeBackward | PipeForward
|
|
deriving (Eq, Show)
|
|
|
|
applyPipe :: TricuAST -> (PipeOp, TricuAST) -> TricuAST
|
|
applyPipe acc (PipeBackward, rhs) =
|
|
SApp acc rhs
|
|
|
|
applyPipe acc (PipeForward, rhs) =
|
|
SApp rhs acc
|
|
|
|
pipeTopP :: TokParser TricuAST
|
|
pipeTopP =
|
|
pipeChainP appTopP appNestedP
|
|
|
|
pipeNestedP :: TokParser TricuAST
|
|
pipeNestedP =
|
|
pipeChainP appNestedP appNestedP
|
|
|
|
pipeChainP :: TokParser TricuAST -> TokParser TricuAST -> TokParser TricuAST
|
|
pipeChainP parseFirst parseOperand = do
|
|
first <- parseFirst
|
|
rest <- many (try pipeSegmentP)
|
|
pure (foldl applyPipe first rest)
|
|
where
|
|
pipeSegmentP = do
|
|
skipNestedNewlines
|
|
op <- pipeOpP
|
|
skipNestedNewlines
|
|
rhs <- parseOperand
|
|
pure (op, rhs)
|
|
|
|
pipeOpP :: TokParser PipeOp
|
|
pipeOpP =
|
|
(tok (== LArrowLeft) "<|" *> pure PipeBackward)
|
|
<|> (tok (== LArrowRight) "|>" *> pure PipeForward)
|
|
|
|
appTopP :: TokParser TricuAST
|
|
appTopP = do
|
|
first <- atomTopP
|
|
appRestTopP first
|
|
|
|
appRestTopP :: TricuAST -> TokParser TricuAST
|
|
appRestTopP acc = do
|
|
mt <- peekP
|
|
case mt of
|
|
Just t | startsAtom t -> do
|
|
arg <- atomTopP
|
|
appRestTopP (SApp acc arg)
|
|
_ -> pure acc
|
|
|
|
appNestedP :: TokParser TricuAST
|
|
appNestedP = do
|
|
first <- atomNestedP
|
|
appRestNestedP first
|
|
|
|
appRestNestedP :: TricuAST -> TokParser TricuAST
|
|
appRestNestedP acc = do
|
|
skipNestedNewlines
|
|
mt <- peekP
|
|
case mt of
|
|
Just t | startsAtom t -> do
|
|
arg <- atomNestedP
|
|
appRestNestedP (SApp acc arg)
|
|
_ -> pure acc
|
|
|
|
startsAtom :: LToken -> Bool
|
|
startsAtom LOpenParen = True
|
|
startsAtom LOpenBracket = True
|
|
startsAtom (LIdentifier _) = True
|
|
startsAtom (LIdentifierWithHash _ _) = True
|
|
startsAtom (LNamespace _) = True
|
|
startsAtom LKeywordT = True
|
|
startsAtom (LIntegerLiteral _) = True
|
|
startsAtom (LStringLiteral _) = True
|
|
startsAtom _ = False
|
|
|
|
atomTopP :: TokParser TricuAST
|
|
atomTopP = do
|
|
toks <- getInput
|
|
case toks of
|
|
LOpenParen : _ -> groupedP
|
|
LOpenBracket : _ -> listP
|
|
LNamespace _ : LDot : _ -> namespacedVarP
|
|
LIdentifier _ : _ -> plainVarP
|
|
LIdentifierWithHash _ _ : _ -> plainVarP
|
|
LKeywordT : _ -> leafP
|
|
LIntegerLiteral _ : _ -> intP
|
|
LStringLiteral _ : _ -> strP
|
|
_ -> fail "expected expression atom"
|
|
|
|
atomNestedP :: TokParser TricuAST
|
|
atomNestedP = skipNestedNewlines *> atomTopP
|
|
|
|
groupedP :: TokParser TricuAST
|
|
groupedP = do
|
|
void (tok (== LOpenParen) "(")
|
|
skipNestedNewlines
|
|
expr <- exprNestedP
|
|
skipNestedNewlines
|
|
void (tok (== LCloseParen) ")")
|
|
pure expr
|
|
|
|
listP :: TokParser TricuAST
|
|
listP = do
|
|
void (tok (== LOpenBracket) "[")
|
|
skipNestedNewlines
|
|
xs <- listElementsP
|
|
skipNestedNewlines
|
|
void (tok (== LCloseBracket) "]")
|
|
pure (SList xs)
|
|
|
|
listElementsP :: TokParser [TricuAST]
|
|
listElementsP = do
|
|
skipNestedNewlines
|
|
mt <- peekP
|
|
case mt of
|
|
Just LCloseBracket -> pure []
|
|
Just t | startsAtom t -> do
|
|
x <- listElementP
|
|
xs <- listElementsP
|
|
pure (x : xs)
|
|
_ -> pure []
|
|
|
|
listElementP :: TokParser TricuAST
|
|
listElementP = do
|
|
toks <- getInput
|
|
case toks of
|
|
LOpenParen : _ -> groupedP
|
|
LOpenBracket : _ -> listP
|
|
LNamespace _ : LDot : _ -> namespacedVarP
|
|
LIdentifier _ : _ -> plainVarP
|
|
LIdentifierWithHash _ _ : _ -> plainVarP
|
|
LKeywordT : _ -> leafP
|
|
LIntegerLiteral _ : _ -> intP
|
|
LStringLiteral _ : _ -> strP
|
|
_ -> fail "expected list element"
|
|
|
|
leafP :: TokParser TricuAST
|
|
leafP = tok (== LKeywordT) "t" *> pure TLeaf
|
|
|
|
plainVarP :: TokParser TricuAST
|
|
plainVarP = do
|
|
t <- tok isVar "identifier"
|
|
case t of
|
|
LIdentifier name -> pure (SVar name Nothing)
|
|
LIdentifierWithHash name hash -> pure (SVar name (Just hash))
|
|
_ -> fail "internal parser error: expected identifier"
|
|
where
|
|
isVar (LIdentifier _) = True
|
|
isVar (LIdentifierWithHash _ _) = True
|
|
isVar _ = False
|
|
|
|
namespacedVarP :: TokParser TricuAST
|
|
namespacedVarP = do
|
|
nsTok <- tok isNamespace "namespace"
|
|
void (tok (== LDot) ".")
|
|
nameTok <- tok isVar "identifier"
|
|
case (nsTok, nameTok) of
|
|
(LNamespace ns, LIdentifier name) ->
|
|
pure (SVar (ns ++ "." ++ name) Nothing)
|
|
(LNamespace ns, LIdentifierWithHash name hash) ->
|
|
pure (SVar (ns ++ "." ++ name) (Just hash))
|
|
_ -> fail "internal parser error: expected namespaced identifier"
|
|
where
|
|
isNamespace (LNamespace _) = True
|
|
isNamespace _ = False
|
|
|
|
isVar (LIdentifier _) = True
|
|
isVar (LIdentifierWithHash _ _) = True
|
|
isVar _ = False
|
|
|
|
intP :: TokParser TricuAST
|
|
intP = do
|
|
t <- tok isInt "integer"
|
|
case t of
|
|
LIntegerLiteral n -> pure (SInt (fromIntegral n))
|
|
_ -> fail "internal parser error: expected integer"
|
|
where
|
|
isInt (LIntegerLiteral _) = True
|
|
isInt _ = False
|
|
|
|
strP :: TokParser TricuAST
|
|
strP = do
|
|
t <- tok isStr "string"
|
|
case t of
|
|
LStringLiteral s -> pure (SStr s)
|
|
_ -> fail "internal parser error: expected string"
|
|
where
|
|
isStr (LStringLiteral _) = True
|
|
isStr _ = False
|
|
|
|
identifierNameP :: TokParser String
|
|
identifierNameP = do
|
|
t <- tok isIdentifier "identifier"
|
|
case t of
|
|
LIdentifier name
|
|
| name `Set.member` reservedNames ->
|
|
fail ("reserved name cannot be used as identifier: " ++ name)
|
|
| otherwise ->
|
|
pure name
|
|
_ -> fail "internal parser error: expected identifier"
|
|
where
|
|
isIdentifier (LIdentifier _) = True
|
|
isIdentifier _ = False
|
|
|
|
tok :: (LToken -> Bool) -> String -> TokParser LToken
|
|
tok predicate expected = satisfy predicate <?> expected
|
|
|
|
peekP :: TokParser (Maybe LToken)
|
|
peekP = do
|
|
toks <- getInput
|
|
pure $ case toks of
|
|
[] -> Nothing
|
|
x : _ -> Just x
|
|
|
|
atEndP :: TokParser Bool
|
|
atEndP = null <$> getInput
|
|
|
|
skipTopNewlines :: TokParser ()
|
|
skipTopNewlines = skipMany (tok (== LNewline) "newline")
|
|
|
|
skipNestedNewlines :: TokParser ()
|
|
skipNestedNewlines = skipMany (tok (== LNewline) "newline")
|
|
|
|
dropNewlines :: [LToken] -> [LToken]
|
|
dropNewlines (LNewline : rest) = dropNewlines rest
|
|
dropNewlines rest = rest
|
|
|
|
handleParseError :: [LToken] -> ParseErrorBundle [LToken] Void -> String
|
|
handleParseError toks bundle =
|
|
unlines
|
|
( "Parse error(s) encountered:"
|
|
: map (formatError toks) (NE.toList (bundleErrors bundle))
|
|
)
|
|
|
|
formatError :: [LToken] -> ParseError [LToken] Void -> String
|
|
formatError toks err =
|
|
case err of
|
|
TrivialError offset unexpected expected ->
|
|
let unexpectedMsg =
|
|
case unexpected of
|
|
Nothing -> "unexpected end of input"
|
|
Just x -> "unexpected " ++ show x
|
|
expectedMsg =
|
|
if Set.null expected
|
|
then ""
|
|
else "; expected one of " ++ show (Set.toList expected)
|
|
in
|
|
"Parse error at token offset " ++ show offset ++ ": " ++ unexpectedMsg ++ expectedMsg
|
|
++ "\nToken context:\n" ++ tokenContext toks offset
|
|
|
|
FancyError offset fancy ->
|
|
"Parse error at token offset " ++ show offset ++ ": " ++ show (Set.toList fancy)
|
|
++ "\nToken context:\n" ++ tokenContext toks offset
|
|
|
|
tokenContext :: [LToken] -> Int -> String
|
|
tokenContext toks off =
|
|
let start = max 0 (off - 5)
|
|
end = min (length toks) (off + 6)
|
|
rows = zip [start ..] (take (end - start) (drop start toks))
|
|
in unlines (map render rows)
|
|
where
|
|
render (i, token)
|
|
| i == off = ">>> " ++ show i ++ ": " ++ show token
|
|
| otherwise = " " ++ show i ++ ": " ++ show token
|