module Lexer where import Research import Control.Monad (void) import Data.Functor (($>)) import Data.Void import Text.Megaparsec import Text.Megaparsec.Char hiding (space) import Text.Megaparsec.Char.Lexer import qualified Data.Set as Set type Lexer = Parsec Void String tricuLexer :: Lexer [LToken] tricuLexer = do sc header <- many $ do tok <- choice [ try lImport , lnewline ] sc pure tok tokens <- many $ do tok <- choice tricuLexer' sc pure tok sc eof pure (header ++ tokens) where tricuLexer' = [ try lnewline , try namespace , try dot , try identifier , try keywordT , try integerLiteral , try stringLiteral , assign , colon , openParen , closeParen , openBracket , closeBracket ] lexTricu :: String -> [LToken] lexTricu input = case runParser tricuLexer "" input of Left err -> errorWithoutStackTrace $ "Lexical error:\n" ++ errorBundlePretty err Right tokens -> tokens keywordT :: Lexer LToken keywordT = string "t" *> notFollowedBy alphaNumChar $> LKeywordT identifier :: Lexer LToken identifier = do first <- lowerChar <|> char '_' rest <- many $ letterChar <|> digitChar <|> char '_' <|> char '-' <|> char '?' <|> char '$' <|> char '#' <|> char '@' <|> char '%' let name = first : rest if name == "t" || name == "!result" then fail "Keywords (`t`, `!result`) cannot be used as an identifier" else return (LIdentifier name) namespace :: Lexer LToken namespace = do name <- try (string "!Local") <|> do first <- upperChar rest <- many (letterChar <|> digitChar) return (first:rest) return (LNamespace name) dot :: Lexer LToken dot = char '.' $> LDot lImport :: Lexer LToken lImport = do _ <- string "!import" space1 LStringLiteral path <- stringLiteral space1 LNamespace name <- namespace return (LImport path name) assign :: Lexer LToken assign = char '=' $> LAssign colon :: Lexer LToken colon = char ':' $> LColon openParen :: Lexer LToken openParen = char '(' $> LOpenParen closeParen :: Lexer LToken closeParen = char ')' $> LCloseParen openBracket :: Lexer LToken openBracket = char '[' $> LOpenBracket closeBracket :: Lexer LToken closeBracket = char ']' $> LCloseBracket lnewline :: Lexer LToken lnewline = char '\n' $> LNewline sc :: Lexer () sc = space (void $ takeWhile1P (Just "space") (\c -> c == ' ' || c == '\t')) (skipLineComment "--") (skipBlockComment "|-" "-|") integerLiteral :: Lexer LToken integerLiteral = do num <- some digitChar return (LIntegerLiteral (read num)) stringLiteral :: Lexer LToken stringLiteral = do char '"' content <- manyTill Lexer.charLiteral (char '"') return (LStringLiteral content) charLiteral :: Lexer Char charLiteral = escapedChar <|> normalChar where normalChar = noneOf ['"', '\\'] escapedChar = do void $ char '\\' c <- oneOf ['n', 't', 'r', 'f', 'b', '\\', '"', '\''] return $ case c of 'n' -> '\n' 't' -> '\t' 'r' -> '\r' 'f' -> '\f' 'b' -> '\b' '\\' -> '\\' '"' -> '"' '\'' -> '\''