
Adds support for several special characters in identifiers. Adds a demo for converting values to source code and another for checking equality. Updates the existing demo and tests to reflect new names for functions returning booleans.
102 lines
2.3 KiB
Haskell
102 lines
2.3 KiB
Haskell
module Lexer where
|
|
|
|
import Research
|
|
|
|
import Control.Monad (void)
|
|
import Data.Void
|
|
import Text.Megaparsec
|
|
import Text.Megaparsec.Char hiding (space)
|
|
import Text.Megaparsec.Char.Lexer
|
|
|
|
import qualified Data.Set as Set
|
|
|
|
type Lexer = Parsec Void String
|
|
|
|
keywordT :: Lexer LToken
|
|
keywordT = string "t" *> notFollowedBy alphaNumChar *> pure LKeywordT
|
|
|
|
identifier :: Lexer LToken
|
|
identifier = do
|
|
first <- letterChar <|> char '_'
|
|
rest <- many $ letterChar
|
|
<|> digitChar
|
|
<|> char '_' <|> char '-' <|> char '?' <|> char '!'
|
|
<|> char '$' <|> char '#' <|> char '@' <|> char '%'
|
|
let name = first : rest
|
|
if (name == "t" || name == "__result")
|
|
then fail "Keywords (`t`, `__result`) cannot be used as an identifier"
|
|
else return (LIdentifier name)
|
|
|
|
integerLiteral :: Lexer LToken
|
|
integerLiteral = do
|
|
num <- some digitChar
|
|
return (LIntegerLiteral (read num))
|
|
|
|
stringLiteral :: Lexer LToken
|
|
stringLiteral = do
|
|
char '"'
|
|
content <- many (noneOf ['"'])
|
|
char '"' --"
|
|
return (LStringLiteral content)
|
|
|
|
assign :: Lexer LToken
|
|
assign = char '=' *> pure LAssign
|
|
|
|
colon :: Lexer LToken
|
|
colon = char ':' *> pure LColon
|
|
|
|
backslash :: Lexer LToken
|
|
backslash = char '\\' *> pure LBackslash
|
|
|
|
openParen :: Lexer LToken
|
|
openParen = char '(' *> pure LOpenParen
|
|
|
|
closeParen :: Lexer LToken
|
|
closeParen = char ')' *> pure LCloseParen
|
|
|
|
openBracket :: Lexer LToken
|
|
openBracket = char '[' *> pure LOpenBracket
|
|
|
|
closeBracket :: Lexer LToken
|
|
closeBracket = char ']' *> pure LCloseBracket
|
|
|
|
lnewline :: Lexer LToken
|
|
lnewline = char '\n' *> pure LNewline
|
|
|
|
sc :: Lexer ()
|
|
sc = space
|
|
(void $ takeWhile1P (Just "space") (\c -> c == ' ' || c == '\t'))
|
|
(skipLineComment "--")
|
|
(skipBlockComment "|-" "-|")
|
|
|
|
tricuLexer :: Lexer [LToken]
|
|
tricuLexer = do
|
|
sc
|
|
tokens <- many $ do
|
|
tok <- choice tricuLexer'
|
|
sc
|
|
pure tok
|
|
sc
|
|
eof
|
|
pure tokens
|
|
where
|
|
tricuLexer' =
|
|
[ try lnewline
|
|
, try identifier
|
|
, try keywordT
|
|
, try integerLiteral
|
|
, try stringLiteral
|
|
, assign
|
|
, colon
|
|
, backslash
|
|
, openParen
|
|
, closeParen
|
|
, openBracket
|
|
, closeBracket
|
|
]
|
|
|
|
lexTricu :: String -> [LToken]
|
|
lexTricu input = case runParser tricuLexer "" input of
|
|
Left err -> errorWithoutStackTrace $ "Lexical error:\n" ++ errorBundlePretty err
|
|
Right tokens -> tokens
|