+ let bindings + where bindings + do notation I explored enough of the alternative language design space and decided that we should commit fully to Lambda style. That means no more highly tacit/concatenative point-free/partial programs as default. We'll keep taking advantage of those capabilities when it makes sense, but the library will continue to see massive overhauls.
350 lines
12 KiB
Haskell
350 lines
12 KiB
Haskell
module Research where
|
|
|
|
import Crypto.Hash (hash, SHA256, Digest)
|
|
import Data.ByteArray (convert)
|
|
import Data.ByteString.Base16 (decode, encode)
|
|
import Data.List (intercalate)
|
|
import Data.Map ()
|
|
import Data.Text (Text, replace)
|
|
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
|
|
import Data.Word (Word8)
|
|
import qualified Data.ByteString as BS
|
|
import qualified Data.Map as Map
|
|
import qualified Data.Set as Set
|
|
import qualified Data.Text as T
|
|
|
|
-- Tree Calculus Types
|
|
data T = Leaf | Stem T | Fork T T
|
|
deriving (Show, Eq, Ord)
|
|
|
|
-- Abstract Syntax Tree for tricu
|
|
data TricuAST
|
|
= SVar String (Maybe String)
|
|
| SInt Integer
|
|
| SStr String
|
|
| SList [TricuAST]
|
|
| SDef String [String] TricuAST
|
|
| SApp TricuAST TricuAST
|
|
| TLeaf
|
|
| TStem TricuAST
|
|
| TFork TricuAST TricuAST
|
|
| SLambda [String] TricuAST
|
|
| SEmpty
|
|
| SImport String String
|
|
deriving (Show, Eq, Ord)
|
|
|
|
-- Lexer Tokens
|
|
data LToken
|
|
= LIdentifier String
|
|
| LIdentifierWithHash String String
|
|
| LKeywordT
|
|
| LNamespace String
|
|
| LImport String String
|
|
| LAssign
|
|
| LColon
|
|
| LDot
|
|
| LOpenParen
|
|
| LCloseParen
|
|
| LOpenBracket
|
|
| LCloseBracket
|
|
| LStringLiteral String
|
|
| LIntegerLiteral Int
|
|
| LArrowLeft
|
|
| LArrowRight
|
|
| LBindArrow
|
|
| LNewline
|
|
| LIndent Int
|
|
deriving (Eq, Show, Ord)
|
|
|
|
-- Output formats
|
|
data EvaluatedForm = Tree | FSL | AST | Ternary | Ascii | Decode | Number | StringLit
|
|
deriving (Show)
|
|
|
|
-- Environment containing previously evaluated TC terms
|
|
type Env = Map.Map String T
|
|
|
|
-- Merkle DAG Node types
|
|
-- Each Tree Calculus node becomes a content-addressed object.
|
|
|
|
type MerkleHash = Text
|
|
|
|
data Node
|
|
= NLeaf
|
|
| NStem MerkleHash
|
|
| NFork MerkleHash MerkleHash
|
|
deriving (Show, Eq, Ord)
|
|
|
|
-- | Canonical serialization of a Node for hashing.
|
|
-- Leaf: 0x00
|
|
-- Stem: 0x01 || child_hash (32 bytes)
|
|
-- Fork: 0x02 || left_hash (32 bytes) || right_hash (32 bytes)
|
|
serializeNode :: Node -> BS.ByteString
|
|
serializeNode NLeaf = BS.pack [0x00]
|
|
serializeNode (NStem h) = BS.pack [0x01] <> go (decode (encodeUtf8 h))
|
|
where go (Left _) = error "Research.serializeNode: invalid hex hash"
|
|
go (Right bs) = bs
|
|
serializeNode (NFork l r) = BS.pack [0x02] <> go (decode (encodeUtf8 l)) <> go (decode (encodeUtf8 r))
|
|
where go (Left _) = error "Research.serializeNode: invalid hex hash"
|
|
go (Right bs) = bs
|
|
|
|
-- | Hash a node per the Merkle content-addressing spec.
|
|
-- hash = SHA256( "arboricx.merkle.node.v1" <> 0x00 <> node_payload )
|
|
nodeHash :: Node -> MerkleHash
|
|
nodeHash node = decodeUtf8 (encode (sha256WithPrefix (serializeNode node)))
|
|
where sha256WithPrefix payload =
|
|
convert . (hash :: BS.ByteString -> Digest SHA256) $ utf8Tag <> BS.pack [0x00] <> payload
|
|
utf8Tag = BS.pack $ map fromIntegral $ BS.unpack "arboricx.merkle.node.v1"
|
|
|
|
-- | Deserialize a Node from canonical bytes.
|
|
deserializeNode :: BS.ByteString -> Node
|
|
deserializeNode bs =
|
|
case BS.uncons bs of
|
|
Just (0x00, rest)
|
|
| BS.null rest -> NLeaf
|
|
|
|
Just (0x01, rest)
|
|
| BS.length rest == 32 ->
|
|
NStem $ decodeUtf8 (encode rest)
|
|
|
|
Just (0x02, rest)
|
|
| BS.length rest == 64 ->
|
|
let (l, r) = BS.splitAt 32 rest
|
|
in NFork (decodeUtf8 (encode l)) (decodeUtf8 (encode r))
|
|
|
|
_ -> errorWithoutStackTrace "invalid merkle node payload"
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- ByteString / bytestream marshalling via existing Tree Calculus conventions
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
-- | Encode a single byte (Word8) as a Tree Calculus number (0..255).
|
|
ofByte :: Word8 -> T
|
|
ofByte = ofNumber . fromIntegral
|
|
|
|
-- | Decode a Tree Calculus number as a single byte (Word8).
|
|
-- Rejects values outside the range 0..255.
|
|
toByte :: T -> Either String Word8
|
|
toByte t = case toNumber t of
|
|
Left err -> Left err
|
|
Right n
|
|
| n >= 0 && n <= 255 -> Right (fromIntegral n)
|
|
| otherwise -> Left ("Byte value out of range: " ++ show n)
|
|
|
|
-- | Encode a ByteString as a Tree Calculus list of Byte trees.
|
|
ofBytes :: BS.ByteString -> T
|
|
ofBytes = ofList . map ofByte . BS.unpack
|
|
|
|
-- | Decode a Tree Calculus list of Byte trees as a ByteString.
|
|
-- Rejects non-list trees and elements that are not valid byte values (0..255).
|
|
toBytes :: T -> Either String BS.ByteString
|
|
toBytes t = case toList t of
|
|
Left err -> Left err
|
|
Right bs -> BS.pack <$> mapM toByte bs
|
|
|
|
-- | Convert a canonical Arboricx node payload (ByteString) to a Tree
|
|
-- representation (a list of Byte trees).
|
|
nodePayloadToTreeBytes :: BS.ByteString -> T
|
|
nodePayloadToTreeBytes = ofBytes
|
|
|
|
-- | Convert a Tree representation of a node payload back to ByteString.
|
|
treeBytesToNodePayload :: T -> Either String BS.ByteString
|
|
treeBytesToNodePayload = toBytes
|
|
|
|
-- | Convert a MerkleHash (hex-encoded) to a Tree of its 32 raw bytes.
|
|
hashToTreeBytes :: MerkleHash -> Either String T
|
|
hashToTreeBytes h = case decode (encodeUtf8 h) of
|
|
Left _ -> Left "Invalid hex MerkleHash"
|
|
Right raw
|
|
| BS.length raw == 32 -> Right (ofBytes raw)
|
|
| otherwise -> Left "Hash raw bytes must be 32 bytes"
|
|
|
|
-- | Convert a Tree of 32 Byte trees back to a MerkleHash (hex string).
|
|
treeBytesToHash :: T -> Either String MerkleHash
|
|
treeBytesToHash t = case toList t of
|
|
Left err -> Left err
|
|
Right bytes
|
|
| length bytes == 32 -> do
|
|
raw <- BS.pack <$> mapM toByte bytes
|
|
Right $ decodeUtf8 (encode raw)
|
|
| otherwise -> Left "Expected exactly 32 byte elements for hash"
|
|
|
|
-- | Build a Merkle DAG from a Tree Calculus term.
|
|
buildMerkle :: T -> Node
|
|
buildMerkle Leaf = NLeaf
|
|
buildMerkle (Stem t) = NStem (nodeHash child)
|
|
where child = buildMerkle t
|
|
buildMerkle (Fork l r) = NFork (nodeHash left) (nodeHash right)
|
|
where
|
|
left = buildMerkle l
|
|
right = buildMerkle r
|
|
|
|
-- Tree Calculus Reduction Rules
|
|
{-
|
|
The t operator is left associative.
|
|
1. t t a b -> a
|
|
2. t (t a) b c -> a c (b c)
|
|
3a. t (t a b) c t -> a
|
|
3b. t (t a b) c (t u) -> b u
|
|
3c. t (t a b) c (t u v) -> c u v
|
|
-}
|
|
apply :: T -> T -> T
|
|
apply (Fork Leaf a) _ = a
|
|
apply (Fork (Stem a) b) c = apply (apply a c) (apply b c)
|
|
apply (Fork (Fork _a _b) _c) Leaf = _a
|
|
apply (Fork (Fork _a _b) _c) (Stem u) = apply _b u
|
|
apply (Fork (Fork _a _b) _c) (Fork u v) = apply (apply _c u) v
|
|
-- Left associative `t`
|
|
apply Leaf b = Stem b
|
|
apply (Stem a) b = Fork a b
|
|
|
|
-- Booleans
|
|
_false :: T
|
|
_false = Leaf
|
|
|
|
_true :: T
|
|
_true = Stem Leaf
|
|
|
|
_not :: T
|
|
_not = Fork (Fork _true (Fork Leaf _false)) Leaf
|
|
|
|
-- Marshalling
|
|
ofString :: String -> T
|
|
ofString str = ofList $ map (ofNumber . toInteger . fromEnum) str
|
|
|
|
ofNumber :: Integer -> T
|
|
ofNumber 0 = Leaf
|
|
ofNumber n =
|
|
Fork
|
|
(if odd n then Stem Leaf else Leaf)
|
|
(ofNumber (n `div` 2))
|
|
|
|
ofList :: [T] -> T
|
|
ofList = foldr Fork Leaf
|
|
|
|
toNumber :: T -> Either String Integer
|
|
toNumber Leaf = Right 0
|
|
toNumber (Fork Leaf rest) = case toNumber rest of
|
|
Right n -> Right (2 * n)
|
|
Left err -> Left err
|
|
toNumber (Fork (Stem Leaf) rest) = case toNumber rest of
|
|
Right n -> Right (1 + 2 * n)
|
|
Left err -> Left err
|
|
toNumber _ = Left "Invalid Tree Calculus number"
|
|
|
|
toChar :: Integer -> Either String Char
|
|
toChar n
|
|
| n < 0 = Left "Negative character code"
|
|
| n > 0x10FFFF = Left "Character code out of Unicode range"
|
|
| n >= 0xD800 && n <= 0xDFFF = Left "Surrogate character code not allowed"
|
|
| otherwise = Right (toEnum (fromInteger n))
|
|
|
|
toString :: T -> Either String String
|
|
toString tc = do
|
|
list <- toList tc
|
|
nums <- mapM toNumber list
|
|
mapM toChar nums
|
|
|
|
toList :: T -> Either String [T]
|
|
toList Leaf = Right []
|
|
toList (Fork x rest) = case toList rest of
|
|
Right xs -> Right (x : xs)
|
|
Left err -> Left err
|
|
toList _ = Left "Invalid Tree Calculus list"
|
|
|
|
-- Outputs
|
|
formatT :: EvaluatedForm -> T -> String
|
|
formatT Tree = toSimpleT . show
|
|
formatT FSL = show
|
|
formatT AST = show . toAST
|
|
formatT Ternary = toTernaryString
|
|
formatT Ascii = toAscii
|
|
formatT Decode = decodeResult
|
|
formatT Number = either (\e -> "<not-number: " ++ e ++ ">") show . toNumber
|
|
formatT StringLit = either (\e -> "<not-string: " ++ e ++ ">") show . toString
|
|
|
|
toSimpleT :: String -> String
|
|
toSimpleT s = T.unpack
|
|
$ replace "Fork" "t"
|
|
$ replace "Stem" "t"
|
|
$ replace "Leaf" "t"
|
|
$ T.pack s
|
|
|
|
toTernaryString :: T -> String
|
|
toTernaryString Leaf = "0"
|
|
toTernaryString (Stem t) = "1" ++ toTernaryString t
|
|
toTernaryString (Fork t1 t2) = "2" ++ toTernaryString t1 ++ toTernaryString t2
|
|
|
|
toAST :: T -> TricuAST
|
|
toAST Leaf = TLeaf
|
|
toAST (Stem a) = TStem (toAST a)
|
|
toAST (Fork a b) = TFork (toAST a) (toAST b)
|
|
|
|
toAscii :: T -> String
|
|
toAscii tree = go tree "" True
|
|
where
|
|
go :: T -> String -> Bool -> String
|
|
go Leaf prefix isLast =
|
|
prefix ++ (if isLast then "`-- " else "|-- ") ++ "Leaf\n"
|
|
go (Stem t) prefix isLast =
|
|
prefix ++ (if isLast then "`-- " else "|-- ") ++ "Stem\n"
|
|
++ go t (prefix ++ (if isLast then " " else "| ")) True
|
|
go (Fork left right) prefix isLast =
|
|
prefix ++ (if isLast then "`-- " else "|-- ") ++ "Fork\n"
|
|
++ go left (prefix ++ (if isLast then " " else "| ")) False
|
|
++ go right (prefix ++ (if isLast then " " else "| ")) True
|
|
|
|
decodeResult :: T -> String
|
|
decodeResult Leaf = "t"
|
|
decodeResult tc =
|
|
case (toString tc, toList tc, toNumber tc) of
|
|
(Right s, _, _) | all isCommonChar s -> "\"" ++ s ++ "\""
|
|
(_, _, Right n) -> show n
|
|
(_, Right xs@(_:_), _) -> "[" ++ intercalate ", " (map decodeResult xs) ++ "]"
|
|
(_, Right [], _) -> "[]"
|
|
_ -> formatT Tree tc
|
|
where
|
|
isCommonChar c =
|
|
let n = fromEnum c
|
|
in (n >= 32 && n <= 126)
|
|
|| n == 9
|
|
|| n == 10
|
|
|| n == 13
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- DAG node-table export (for host-language kernel embedding)
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
-- | Export a term's Merkle DAG as a topologically-sorted node table.
|
|
-- Children appear before parents so all index references are forward.
|
|
-- Returns (root index, list of (tag, [child_indices])).
|
|
exportDag :: T -> (Int, [(String, [Int])])
|
|
exportDag term =
|
|
let (root, acc, _) = collectDag term [] Set.empty
|
|
-- acc is in reverse post-order (children first, root last)
|
|
ordered = reverse acc
|
|
idxMap = Map.fromList [(h, i) | (i, (h, _)) <- zip [0..] ordered]
|
|
rootIdx = idxMap Map.! root
|
|
lines_ = map (formatNode idxMap . snd) ordered
|
|
in (rootIdx, lines_)
|
|
where
|
|
collectDag :: T -> [(MerkleHash, Node)] -> Set.Set MerkleHash -> (MerkleHash, [(MerkleHash, Node)], Set.Set MerkleHash)
|
|
collectDag Leaf acc seen =
|
|
let h = nodeHash NLeaf
|
|
in if Set.member h seen then (h, acc, seen) else (h, (h, NLeaf) : acc, Set.insert h seen)
|
|
collectDag (Stem t) acc seen =
|
|
let (ch, acc', seen') = collectDag t acc seen
|
|
node = NStem ch
|
|
h = nodeHash node
|
|
in if Set.member h seen' then (h, acc', seen') else (h, (h, node) : acc', Set.insert h seen')
|
|
collectDag (Fork l r) acc seen =
|
|
let (lh, acc', seen') = collectDag l acc seen
|
|
(rh, acc'', seen'') = collectDag r acc' seen'
|
|
node = NFork lh rh
|
|
h = nodeHash node
|
|
in if Set.member h seen'' then (h, acc'', seen'') else (h, (h, node) : acc'', Set.insert h seen'')
|
|
|
|
formatNode :: Map.Map MerkleHash Int -> Node -> (String, [Int])
|
|
formatNode _ NLeaf = ("leaf", [])
|
|
formatNode idxMap (NStem ch) = ("stem", [idxMap Map.! ch])
|
|
formatNode idxMap (NFork l r) = ("fork", [idxMap Map.! l, idxMap Map.! r])
|