144 lines
3.2 KiB
Haskell
144 lines
3.2 KiB
Haskell
module Lexer where
|
|
|
|
import Research
|
|
|
|
import Control.Monad (void)
|
|
import Data.Functor (($>))
|
|
import Data.Void
|
|
import Text.Megaparsec
|
|
import Text.Megaparsec.Char hiding (space)
|
|
import Text.Megaparsec.Char.Lexer
|
|
|
|
import qualified Data.Set as Set
|
|
|
|
type Lexer = Parsec Void String
|
|
|
|
tricuLexer :: Lexer [LToken]
|
|
tricuLexer = do
|
|
sc
|
|
header <- many $ do
|
|
tok <- choice
|
|
[ try lImport
|
|
, lnewline
|
|
]
|
|
sc
|
|
pure tok
|
|
tokens <- many $ do
|
|
tok <- choice tricuLexer'
|
|
sc
|
|
pure tok
|
|
sc
|
|
eof
|
|
pure (header ++ tokens)
|
|
where
|
|
tricuLexer' =
|
|
[ try lnewline
|
|
, try namespace
|
|
, try dot
|
|
, try identifier
|
|
, try keywordT
|
|
, try integerLiteral
|
|
, try stringLiteral
|
|
, assign
|
|
, colon
|
|
, openParen
|
|
, closeParen
|
|
, openBracket
|
|
, closeBracket
|
|
]
|
|
|
|
lexTricu :: String -> [LToken]
|
|
lexTricu input = case runParser tricuLexer "" input of
|
|
Left err -> errorWithoutStackTrace $ "Lexical error:\n" ++ errorBundlePretty err
|
|
Right tokens -> tokens
|
|
|
|
|
|
keywordT :: Lexer LToken
|
|
keywordT = string "t" *> notFollowedBy alphaNumChar $> LKeywordT
|
|
|
|
identifier :: Lexer LToken
|
|
identifier = do
|
|
first <- lowerChar <|> char '_'
|
|
rest <- many $ letterChar
|
|
<|> digitChar <|> char '_' <|> char '-' <|> char '?'
|
|
<|> char '$' <|> char '#' <|> char '@' <|> char '%'
|
|
let name = first : rest
|
|
if name == "t" || name == "!result"
|
|
then fail "Keywords (`t`, `!result`) cannot be used as an identifier"
|
|
else return (LIdentifier name)
|
|
|
|
namespace :: Lexer LToken
|
|
namespace = do
|
|
name <- try (string "!Local") <|> do
|
|
first <- upperChar
|
|
rest <- many (letterChar <|> digitChar)
|
|
return (first:rest)
|
|
return (LNamespace name)
|
|
|
|
dot :: Lexer LToken
|
|
dot = char '.' $> LDot
|
|
|
|
lImport :: Lexer LToken
|
|
lImport = do
|
|
_ <- string "!import"
|
|
space1
|
|
LStringLiteral path <- stringLiteral
|
|
space1
|
|
LNamespace name <- namespace
|
|
return (LImport path name)
|
|
|
|
assign :: Lexer LToken
|
|
assign = char '=' $> LAssign
|
|
|
|
colon :: Lexer LToken
|
|
colon = char ':' $> LColon
|
|
|
|
openParen :: Lexer LToken
|
|
openParen = char '(' $> LOpenParen
|
|
|
|
closeParen :: Lexer LToken
|
|
closeParen = char ')' $> LCloseParen
|
|
|
|
openBracket :: Lexer LToken
|
|
openBracket = char '[' $> LOpenBracket
|
|
|
|
closeBracket :: Lexer LToken
|
|
closeBracket = char ']' $> LCloseBracket
|
|
|
|
lnewline :: Lexer LToken
|
|
lnewline = char '\n' $> LNewline
|
|
|
|
sc :: Lexer ()
|
|
sc = space
|
|
(void $ takeWhile1P (Just "space") (\c -> c == ' ' || c == '\t'))
|
|
(skipLineComment "--")
|
|
(skipBlockComment "|-" "-|")
|
|
|
|
integerLiteral :: Lexer LToken
|
|
integerLiteral = do
|
|
num <- some digitChar
|
|
return (LIntegerLiteral (read num))
|
|
|
|
stringLiteral :: Lexer LToken
|
|
stringLiteral = do
|
|
char '"'
|
|
content <- manyTill Lexer.charLiteral (char '"')
|
|
return (LStringLiteral content)
|
|
|
|
charLiteral :: Lexer Char
|
|
charLiteral = escapedChar <|> normalChar
|
|
where
|
|
normalChar = noneOf ['"', '\\']
|
|
escapedChar = do
|
|
void $ char '\\'
|
|
c <- oneOf ['n', 't', 'r', 'f', 'b', '\\', '"', '\'']
|
|
return $ case c of
|
|
'n' -> '\n'
|
|
't' -> '\t'
|
|
'r' -> '\r'
|
|
'f' -> '\f'
|
|
'b' -> '\b'
|
|
'\\' -> '\\'
|
|
'"' -> '"'
|
|
'\'' -> '\''
|