module Lexer where import Research import Control.Monad (void) import Data.Functor (($>)) import Data.Set () import Data.Void import Text.Megaparsec import Text.Megaparsec.Char hiding (space) import Text.Megaparsec.Char.Lexer type Lexer = Parsec Void String tricuLexer :: Lexer [LToken] tricuLexer = do sc header <- many $ do tok <- choice [ try lImport , lnewline ] sc pure tok toks <- many $ do tok <- choice tricuLexer' sc pure tok sc eof pure (header ++ toks) where tricuLexer' = [ try lnewline , try namespace , try dot , try identifierWithHash , try identifier , try keywordT , try integerLiteral , try stringLiteral , assign , colon , openParen , closeParen , openBracket , closeBracket , try arrowLeft , try arrowRight ] lexTricu :: String -> [LToken] lexTricu input = case runParser tricuLexer "" input of Left err -> errorWithoutStackTrace $ "Lexical error:\n" ++ errorBundlePretty err Right toks -> toks keywordT :: Lexer LToken keywordT = string "t" *> notFollowedBy alphaNumChar $> LKeywordT identifierWithHash :: Lexer LToken identifierWithHash = do first <- lowerChar <|> char '_' rest <- many $ letterChar <|> digitChar <|> char '_' <|> char '-' <|> char '?' <|> char '$' <|> char '@' <|> char '%' <|> char '\'' _ <- char '#' -- Consume '#' hashString <- some (alphaNumChar <|> char '-') -- Ensures at least one char for hash "hash characters (alphanumeric or hyphen)" let name = first : rest let hashLen = length hashString if name == "t" || name == "!result" then fail "Keywords (`t`, `!result`) cannot be used with a hash suffix." else if hashLen < 16 then fail $ "Hash suffix for '" ++ name ++ "' must be at least 16 characters long. Got " ++ show hashLen ++ " ('" ++ hashString ++ "')." else if hashLen > 64 then -- Assuming SHA256, max 64 fail $ "Hash suffix for '" ++ name ++ "' cannot be longer than 64 characters (SHA256). Got " ++ show hashLen ++ " ('" ++ hashString ++ "')." else return (LIdentifierWithHash name hashString) identifier :: Lexer LToken identifier = do first <- lowerChar <|> char '_' rest <- many $ letterChar <|> digitChar <|> char '_' <|> char '-' <|> char '?' <|> char '$' <|> char '@' <|> char '%' <|> char '\'' let name = first : rest if name == "t" || name == "!result" then fail "Keywords (`t`, `!result`) cannot be used as an identifier" else return (LIdentifier name) namespace :: Lexer LToken namespace = do name <- try (string "!Local") <|> do first <- upperChar rest <- many (letterChar <|> digitChar) return (first:rest) return (LNamespace name) dot :: Lexer LToken dot = char '.' $> LDot lImport :: Lexer LToken lImport = do _ <- string "!import" space1 LStringLiteral path <- stringLiteral space1 LNamespace name <- namespace return (LImport path name) assign :: Lexer LToken assign = char '=' $> LAssign colon :: Lexer LToken colon = char ':' $> LColon openParen :: Lexer LToken openParen = char '(' $> LOpenParen closeParen :: Lexer LToken closeParen = char ')' $> LCloseParen openBracket :: Lexer LToken openBracket = char '[' $> LOpenBracket closeBracket :: Lexer LToken closeBracket = char ']' $> LCloseBracket arrowLeft :: Lexer LToken arrowLeft = string "<|" $> LArrowLeft arrowRight :: Lexer LToken arrowRight = string "|>" $> LArrowRight lnewline :: Lexer LToken lnewline = char '\n' $> LNewline sc :: Lexer () sc = space (void $ takeWhile1P (Just "space") (\c -> c == ' ' || c == '\t')) (skipLineComment "--") (skipBlockComment "|-" "-|") integerLiteral :: Lexer LToken integerLiteral = do num <- some digitChar return (LIntegerLiteral (read num)) stringLiteral :: Lexer LToken stringLiteral = do void (char '"') content <- manyTill Lexer.charLiteral (void (char '"')) return (LStringLiteral content) charLiteral :: Lexer Char charLiteral = escapedChar <|> normalChar where normalChar = noneOf ['"', '\\'] escapedChar = do void $ char '\\' c <- oneOf ['n', 't', 'r', 'f', 'b', '\\', '"', '\''] return $ case c of 'n' -> '\n' 't' -> '\t' 'r' -> '\r' 'f' -> '\f' 'b' -> '\b' '\\' -> '\\' '"' -> '"' '\'' -> '\'' _ -> c