module Lexer where import Research import Text.Megaparsec import Text.Megaparsec.Char import Data.Void import qualified Data.Set as Set type Lexer = Parsec Void String data LToken = LKeywordT | LIdentifier String | LIntegerLiteral Int | LStringLiteral String | LAssign | LColon | LBackslash | LOpenParen | LCloseParen | LOpenBracket | LCloseBracket | LNewline deriving (Show, Eq, Ord) keywordT :: Lexer LToken keywordT = string "t" *> notFollowedBy alphaNumChar *> pure LKeywordT identifier :: Lexer LToken identifier = do name <- some (letterChar <|> char '_' <|> char '-') if (name == "t" || name == "__result") then fail "Keywords (`t`, `__result`) cannot be used as an identifier" else return (LIdentifier name) integerLiteral :: Lexer LToken integerLiteral = do num <- some digitChar return (LIntegerLiteral (read num)) stringLiteral :: Lexer LToken stringLiteral = do char '"' content <- many (noneOf ['"']) if null content then fail "Empty string literals are not allowed" else do char '"' --" return (LStringLiteral content) assign :: Lexer LToken assign = char '=' *> pure LAssign colon :: Lexer LToken colon = char ':' *> pure LColon backslash :: Lexer LToken backslash = char '\\' *> pure LBackslash openParen :: Lexer LToken openParen = char '(' *> pure LOpenParen closeParen :: Lexer LToken closeParen = char ')' *> pure LCloseParen openBracket :: Lexer LToken openBracket = char '[' *> pure LOpenBracket closeBracket :: Lexer LToken closeBracket = char ']' *> pure LCloseBracket lnewline :: Lexer LToken lnewline = char '\n' *> pure LNewline sc :: Lexer () sc = skipMany (char ' ' <|> char '\t') saplingLexer :: Lexer [LToken] saplingLexer = many (sc *> choice [ try identifier , try keywordT , try integerLiteral , try stringLiteral , assign , colon , backslash , openParen , closeParen , openBracket , closeBracket , lnewline ] <* sc) <* eof lexSapling :: String -> [LToken] lexSapling input = case runParser saplingLexer "" input of Left err -> error $ "Lexical error:\n" ++ errorBundlePretty err Right tokens -> tokens