Files
tricu/src/Lexer.hs

144 lines
3.2 KiB
Haskell

module Lexer where
import Research
import Control.Monad (void)
import Data.Functor (($>))
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char hiding (space)
import Text.Megaparsec.Char.Lexer
import qualified Data.Set as Set
type Lexer = Parsec Void String
tricuLexer :: Lexer [LToken]
tricuLexer = do
sc
header <- many $ do
tok <- choice
[ try lImport
, lnewline
]
sc
pure tok
tokens <- many $ do
tok <- choice tricuLexer'
sc
pure tok
sc
eof
pure (header ++ tokens)
where
tricuLexer' =
[ try lnewline
, try namespace
, try dot
, try identifier
, try keywordT
, try integerLiteral
, try stringLiteral
, assign
, colon
, openParen
, closeParen
, openBracket
, closeBracket
]
lexTricu :: String -> [LToken]
lexTricu input = case runParser tricuLexer "" input of
Left err -> errorWithoutStackTrace $ "Lexical error:\n" ++ errorBundlePretty err
Right tokens -> tokens
keywordT :: Lexer LToken
keywordT = string "t" *> notFollowedBy alphaNumChar $> LKeywordT
identifier :: Lexer LToken
identifier = do
first <- lowerChar <|> char '_'
rest <- many $ letterChar
<|> digitChar <|> char '_' <|> char '-' <|> char '?'
<|> char '$' <|> char '#' <|> char '@' <|> char '%'
let name = first : rest
if name == "t" || name == "!result"
then fail "Keywords (`t`, `!result`) cannot be used as an identifier"
else return (LIdentifier name)
namespace :: Lexer LToken
namespace = do
name <- try (string "!Local") <|> do
first <- upperChar
rest <- many (letterChar <|> digitChar)
return (first:rest)
return (LNamespace name)
dot :: Lexer LToken
dot = char '.' $> LDot
lImport :: Lexer LToken
lImport = do
_ <- string "!import"
space1
LStringLiteral path <- stringLiteral
space1
LNamespace name <- namespace
return (LImport path name)
assign :: Lexer LToken
assign = char '=' $> LAssign
colon :: Lexer LToken
colon = char ':' $> LColon
openParen :: Lexer LToken
openParen = char '(' $> LOpenParen
closeParen :: Lexer LToken
closeParen = char ')' $> LCloseParen
openBracket :: Lexer LToken
openBracket = char '[' $> LOpenBracket
closeBracket :: Lexer LToken
closeBracket = char ']' $> LCloseBracket
lnewline :: Lexer LToken
lnewline = char '\n' $> LNewline
sc :: Lexer ()
sc = space
(void $ takeWhile1P (Just "space") (\c -> c == ' ' || c == '\t'))
(skipLineComment "--")
(skipBlockComment "|-" "-|")
integerLiteral :: Lexer LToken
integerLiteral = do
num <- some digitChar
return (LIntegerLiteral (read num))
stringLiteral :: Lexer LToken
stringLiteral = do
char '"'
content <- manyTill Lexer.charLiteral (char '"')
return (LStringLiteral content)
charLiteral :: Lexer Char
charLiteral = escapedChar <|> normalChar
where
normalChar = noneOf ['"', '\\']
escapedChar = do
void $ char '\\'
c <- oneOf ['n', 't', 'r', 'f', 'b', '\\', '"', '\'']
return $ case c of
'n' -> '\n'
't' -> '\t'
'r' -> '\r'
'f' -> '\f'
'b' -> '\b'
'\\' -> '\\'
'"' -> '"'
'\'' -> '\''