tricu/src/Lexer.hs
James Eversole 63504ba939 Rough draft of modules
This includes a naive implementation of a module system where imported
files have their imports recursively handled, strips the module/import
AST nodes, and then evals everything into a flat environment using
namespace prefixes like "Module.function".
2025-01-27 12:24:30 -06:00

126 lines
2.8 KiB
Haskell

module Lexer where
import Research
import Control.Monad (void)
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char hiding (space)
import Text.Megaparsec.Char.Lexer
import qualified Data.Set as Set
type Lexer = Parsec Void String
keywordT :: Lexer LToken
keywordT = string "t" *> notFollowedBy alphaNumChar *> pure LKeywordT
identifier :: Lexer LToken
identifier = do
first <- letterChar <|> char '_'
rest <- many $ letterChar
<|> digitChar
<|> char '_' <|> char '-' <|> char '?' <|> char '.'
<|> char '$' <|> char '#' <|> char '@' <|> char '%'
let name = first : rest
if (name == "t" || name == "!result")
then fail "Keywords (`t`, `!result`) cannot be used as an identifier"
else return (LIdentifier name)
integerLiteral :: Lexer LToken
integerLiteral = do
num <- some digitChar
return (LIntegerLiteral (read num))
stringLiteral :: Lexer LToken
stringLiteral = do
char '"'
content <- many (noneOf ['"'])
char '"' --"
return (LStringLiteral content)
lModule :: Lexer LToken
lModule = do
_ <- string "!module"
space1
LIdentifier moduleName <- identifier
return (LModule moduleName)
lImport :: Lexer LToken
lImport = do
_ <- string "!import"
space1
LStringLiteral path <- stringLiteral
space1
LIdentifier name <- identifier
return (LImport path name)
assign :: Lexer LToken
assign = char '=' *> pure LAssign
colon :: Lexer LToken
colon = char ':' *> pure LColon
backslash :: Lexer LToken
backslash = char '\\' *> pure LBackslash
openParen :: Lexer LToken
openParen = char '(' *> pure LOpenParen
closeParen :: Lexer LToken
closeParen = char ')' *> pure LCloseParen
openBracket :: Lexer LToken
openBracket = char '[' *> pure LOpenBracket
closeBracket :: Lexer LToken
closeBracket = char ']' *> pure LCloseBracket
lnewline :: Lexer LToken
lnewline = char '\n' *> pure LNewline
sc :: Lexer ()
sc = space
(void $ takeWhile1P (Just "space") (\c -> c == ' ' || c == '\t'))
(skipLineComment "--")
(skipBlockComment "|-" "-|")
tricuLexer :: Lexer [LToken]
tricuLexer = do
sc
header <- many $ do
tok <- choice
[ try lModule
, try lImport
, lnewline
]
sc
pure tok
tokens <- many $ do
tok <- choice tricuLexer'
sc
pure tok
sc
eof
pure (header ++ tokens)
where
tricuLexer' =
[ try lnewline
, try identifier
, try keywordT
, try integerLiteral
, try stringLiteral
, assign
, colon
, backslash
, openParen
, closeParen
, openBracket
, closeBracket
]
lexTricu :: String -> [LToken]
lexTricu input = case runParser tricuLexer "" input of
Left err -> errorWithoutStackTrace $ "Lexical error:\n" ++ errorBundlePretty err
Right tokens -> tokens