
This includes a naive implementation of a module system where imported files have their imports recursively handled, strips the module/import AST nodes, and then evals everything into a flat environment using namespace prefixes like "Module.function".
126 lines
2.8 KiB
Haskell
126 lines
2.8 KiB
Haskell
module Lexer where
|
|
|
|
import Research
|
|
|
|
import Control.Monad (void)
|
|
import Data.Void
|
|
import Text.Megaparsec
|
|
import Text.Megaparsec.Char hiding (space)
|
|
import Text.Megaparsec.Char.Lexer
|
|
|
|
import qualified Data.Set as Set
|
|
|
|
type Lexer = Parsec Void String
|
|
|
|
keywordT :: Lexer LToken
|
|
keywordT = string "t" *> notFollowedBy alphaNumChar *> pure LKeywordT
|
|
|
|
identifier :: Lexer LToken
|
|
identifier = do
|
|
first <- letterChar <|> char '_'
|
|
rest <- many $ letterChar
|
|
<|> digitChar
|
|
<|> char '_' <|> char '-' <|> char '?' <|> char '.'
|
|
<|> char '$' <|> char '#' <|> char '@' <|> char '%'
|
|
let name = first : rest
|
|
if (name == "t" || name == "!result")
|
|
then fail "Keywords (`t`, `!result`) cannot be used as an identifier"
|
|
else return (LIdentifier name)
|
|
|
|
integerLiteral :: Lexer LToken
|
|
integerLiteral = do
|
|
num <- some digitChar
|
|
return (LIntegerLiteral (read num))
|
|
|
|
stringLiteral :: Lexer LToken
|
|
stringLiteral = do
|
|
char '"'
|
|
content <- many (noneOf ['"'])
|
|
char '"' --"
|
|
return (LStringLiteral content)
|
|
|
|
lModule :: Lexer LToken
|
|
lModule = do
|
|
_ <- string "!module"
|
|
space1
|
|
LIdentifier moduleName <- identifier
|
|
return (LModule moduleName)
|
|
|
|
lImport :: Lexer LToken
|
|
lImport = do
|
|
_ <- string "!import"
|
|
space1
|
|
LStringLiteral path <- stringLiteral
|
|
space1
|
|
LIdentifier name <- identifier
|
|
return (LImport path name)
|
|
|
|
assign :: Lexer LToken
|
|
assign = char '=' *> pure LAssign
|
|
|
|
colon :: Lexer LToken
|
|
colon = char ':' *> pure LColon
|
|
|
|
backslash :: Lexer LToken
|
|
backslash = char '\\' *> pure LBackslash
|
|
|
|
openParen :: Lexer LToken
|
|
openParen = char '(' *> pure LOpenParen
|
|
|
|
closeParen :: Lexer LToken
|
|
closeParen = char ')' *> pure LCloseParen
|
|
|
|
openBracket :: Lexer LToken
|
|
openBracket = char '[' *> pure LOpenBracket
|
|
|
|
closeBracket :: Lexer LToken
|
|
closeBracket = char ']' *> pure LCloseBracket
|
|
|
|
lnewline :: Lexer LToken
|
|
lnewline = char '\n' *> pure LNewline
|
|
|
|
sc :: Lexer ()
|
|
sc = space
|
|
(void $ takeWhile1P (Just "space") (\c -> c == ' ' || c == '\t'))
|
|
(skipLineComment "--")
|
|
(skipBlockComment "|-" "-|")
|
|
|
|
tricuLexer :: Lexer [LToken]
|
|
tricuLexer = do
|
|
sc
|
|
header <- many $ do
|
|
tok <- choice
|
|
[ try lModule
|
|
, try lImport
|
|
, lnewline
|
|
]
|
|
sc
|
|
pure tok
|
|
tokens <- many $ do
|
|
tok <- choice tricuLexer'
|
|
sc
|
|
pure tok
|
|
sc
|
|
eof
|
|
pure (header ++ tokens)
|
|
where
|
|
tricuLexer' =
|
|
[ try lnewline
|
|
, try identifier
|
|
, try keywordT
|
|
, try integerLiteral
|
|
, try stringLiteral
|
|
, assign
|
|
, colon
|
|
, backslash
|
|
, openParen
|
|
, closeParen
|
|
, openBracket
|
|
, closeBracket
|
|
]
|
|
|
|
lexTricu :: String -> [LToken]
|
|
lexTricu input = case runParser tricuLexer "" input of
|
|
Left err -> errorWithoutStackTrace $ "Lexical error:\n" ++ errorBundlePretty err
|
|
Right tokens -> tokens
|