module Lexer where import Research import Control.Monad (void) import Data.Void import Text.Megaparsec import Text.Megaparsec.Char hiding (space) import Text.Megaparsec.Char.Lexer import qualified Data.Set as Set type Lexer = Parsec Void String keywordT :: Lexer LToken keywordT = string "t" *> notFollowedBy alphaNumChar *> pure LKeywordT identifier :: Lexer LToken identifier = do first <- letterChar <|> char '_' rest <- many $ letterChar <|> digitChar <|> char '_' <|> char '-' <|> char '?' <|> char '.' <|> char '$' <|> char '#' <|> char '@' <|> char '%' let name = first : rest if (name == "t" || name == "!result") then fail "Keywords (`t`, `!result`) cannot be used as an identifier" else return (LIdentifier name) integerLiteral :: Lexer LToken integerLiteral = do num <- some digitChar return (LIntegerLiteral (read num)) stringLiteral :: Lexer LToken stringLiteral = do char '"' content <- many (noneOf ['"']) char '"' --" return (LStringLiteral content) lModule :: Lexer LToken lModule = do _ <- string "!module" space1 LIdentifier moduleName <- identifier return (LModule moduleName) lImport :: Lexer LToken lImport = do _ <- string "!import" space1 LStringLiteral path <- stringLiteral space1 LIdentifier name <- identifier return (LImport path name) assign :: Lexer LToken assign = char '=' *> pure LAssign colon :: Lexer LToken colon = char ':' *> pure LColon backslash :: Lexer LToken backslash = char '\\' *> pure LBackslash openParen :: Lexer LToken openParen = char '(' *> pure LOpenParen closeParen :: Lexer LToken closeParen = char ')' *> pure LCloseParen openBracket :: Lexer LToken openBracket = char '[' *> pure LOpenBracket closeBracket :: Lexer LToken closeBracket = char ']' *> pure LCloseBracket lnewline :: Lexer LToken lnewline = char '\n' *> pure LNewline sc :: Lexer () sc = space (void $ takeWhile1P (Just "space") (\c -> c == ' ' || c == '\t')) (skipLineComment "--") (skipBlockComment "|-" "-|") tricuLexer :: Lexer [LToken] tricuLexer = do sc header <- many $ do tok <- choice [ try lModule , try lImport , lnewline ] sc pure tok tokens <- many $ do tok <- choice tricuLexer' sc pure tok sc eof pure (header ++ tokens) where tricuLexer' = [ try lnewline , try identifier , try keywordT , try integerLiteral , try stringLiteral , assign , colon , backslash , openParen , closeParen , openBracket , closeBracket ] lexTricu :: String -> [LToken] lexTricu input = case runParser tricuLexer "" input of Left err -> errorWithoutStackTrace $ "Lexical error:\n" ++ errorBundlePretty err Right tokens -> tokens