2024-12-18 18:55:51 -06:00
|
|
|
module Lexer where
|
|
|
|
|
|
|
|
import Research
|
|
|
|
import Text.Megaparsec
|
|
|
|
import Text.Megaparsec.Char
|
|
|
|
import Data.Void
|
2024-12-20 11:38:09 -06:00
|
|
|
import qualified Data.Set as Set
|
2024-12-18 18:55:51 -06:00
|
|
|
|
|
|
|
type Lexer = Parsec Void String
|
2024-12-27 12:27:00 -06:00
|
|
|
|
2024-12-18 18:55:51 -06:00
|
|
|
data LToken
|
|
|
|
= LKeywordT
|
|
|
|
| LIdentifier String
|
|
|
|
| LIntegerLiteral Int
|
|
|
|
| LStringLiteral String
|
|
|
|
| LAssign
|
2024-12-27 08:17:06 -06:00
|
|
|
| LColon
|
|
|
|
| LBackslash
|
2024-12-18 18:55:51 -06:00
|
|
|
| LOpenParen
|
|
|
|
| LCloseParen
|
|
|
|
| LOpenBracket
|
|
|
|
| LCloseBracket
|
|
|
|
| LNewline
|
|
|
|
deriving (Show, Eq, Ord)
|
|
|
|
|
|
|
|
keywordT :: Lexer LToken
|
|
|
|
keywordT = string "t" *> notFollowedBy alphaNumChar *> pure LKeywordT
|
|
|
|
|
|
|
|
identifier :: Lexer LToken
|
|
|
|
identifier = do
|
|
|
|
name <- some (letterChar <|> char '_' <|> char '-')
|
2024-12-19 21:13:57 -06:00
|
|
|
if (name == "t" || name == "__result")
|
|
|
|
then fail "Keywords (`t`, `__result`) cannot be used as an identifier"
|
2024-12-18 18:55:51 -06:00
|
|
|
else return (LIdentifier name)
|
|
|
|
|
|
|
|
integerLiteral :: Lexer LToken
|
|
|
|
integerLiteral = do
|
|
|
|
num <- some digitChar
|
|
|
|
return (LIntegerLiteral (read num))
|
|
|
|
|
|
|
|
stringLiteral :: Lexer LToken
|
|
|
|
stringLiteral = do
|
|
|
|
char '"'
|
|
|
|
content <- many (noneOf ['"'])
|
2024-12-20 11:38:09 -06:00
|
|
|
if null content
|
|
|
|
then fail "Empty string literals are not allowed"
|
|
|
|
else do
|
2024-12-27 12:27:00 -06:00
|
|
|
char '"'
|
2024-12-20 11:38:09 -06:00
|
|
|
return (LStringLiteral content)
|
2024-12-18 18:55:51 -06:00
|
|
|
|
|
|
|
assign :: Lexer LToken
|
|
|
|
assign = char '=' *> pure LAssign
|
|
|
|
|
2024-12-27 08:17:06 -06:00
|
|
|
colon :: Lexer LToken
|
|
|
|
colon = char ':' *> pure LColon
|
|
|
|
|
|
|
|
backslash :: Lexer LToken
|
|
|
|
backslash = char '\\' *> pure LBackslash
|
|
|
|
|
2024-12-18 18:55:51 -06:00
|
|
|
openParen :: Lexer LToken
|
|
|
|
openParen = char '(' *> pure LOpenParen
|
|
|
|
|
|
|
|
closeParen :: Lexer LToken
|
|
|
|
closeParen = char ')' *> pure LCloseParen
|
|
|
|
|
|
|
|
openBracket :: Lexer LToken
|
|
|
|
openBracket = char '[' *> pure LOpenBracket
|
|
|
|
|
|
|
|
closeBracket :: Lexer LToken
|
|
|
|
closeBracket = char ']' *> pure LCloseBracket
|
|
|
|
|
|
|
|
lnewline :: Lexer LToken
|
|
|
|
lnewline = char '\n' *> pure LNewline
|
|
|
|
|
|
|
|
sc :: Lexer ()
|
|
|
|
sc = skipMany (char ' ' <|> char '\t')
|
|
|
|
|
|
|
|
saplingLexer :: Lexer [LToken]
|
|
|
|
saplingLexer = many (sc *> choice
|
2024-12-20 11:38:09 -06:00
|
|
|
[ try identifier
|
|
|
|
, try keywordT
|
2024-12-18 18:55:51 -06:00
|
|
|
, try integerLiteral
|
|
|
|
, try stringLiteral
|
|
|
|
, assign
|
2024-12-27 08:17:06 -06:00
|
|
|
, colon
|
|
|
|
, backslash
|
2024-12-18 18:55:51 -06:00
|
|
|
, openParen
|
|
|
|
, closeParen
|
|
|
|
, openBracket
|
|
|
|
, closeBracket
|
|
|
|
, lnewline
|
2024-12-20 11:38:09 -06:00
|
|
|
] <* sc) <* eof
|
2024-12-19 18:57:57 -06:00
|
|
|
|
2024-12-20 11:38:09 -06:00
|
|
|
lexSapling :: String -> [LToken]
|
2024-12-19 18:57:57 -06:00
|
|
|
lexSapling input = case runParser saplingLexer "" input of
|
2024-12-27 12:27:00 -06:00
|
|
|
Left err -> error $ "Lexical error:\n" ++ errorBundlePretty err
|
2024-12-19 18:57:57 -06:00
|
|
|
Right tokens -> tokens
|