module Research where import Crypto.Hash (hash, SHA256, Digest) import Data.ByteArray (convert) import Data.ByteString.Base16 (decode, encode) import Data.List (intercalate) import Data.Map () import Data.Text (Text, replace) import Data.Text.Encoding (decodeUtf8, encodeUtf8) import Data.Word (Word8) import qualified Data.ByteString as BS import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.Text as T -- Tree Calculus Types data T = Leaf | Stem T | Fork T T deriving (Show, Eq, Ord) -- Abstract Syntax Tree for tricu data TricuAST = SVar String (Maybe String) | SInt Integer | SStr String | SList [TricuAST] | SDef String [String] TricuAST | SApp TricuAST TricuAST | TLeaf | TStem TricuAST | TFork TricuAST TricuAST | SLambda [String] TricuAST | SEmpty | SImport String String deriving (Show, Eq, Ord) -- Lexer Tokens data LToken = LIdentifier String | LIdentifierWithHash String String | LKeywordT | LNamespace String | LImport String String | LAssign | LColon | LDot | LOpenParen | LCloseParen | LOpenBracket | LCloseBracket | LStringLiteral String | LIntegerLiteral Int | LArrowLeft | LArrowRight | LBindArrow | LNewline | LIndent Int deriving (Eq, Show, Ord) -- Output formats data EvaluatedForm = Tree | FSL | AST | Ternary | Ascii | Decode | Number | StringLit deriving (Show) -- Environment containing previously evaluated TC terms type Env = Map.Map String T -- Merkle DAG Node types -- Each Tree Calculus node becomes a content-addressed object. type MerkleHash = Text data Node = NLeaf | NStem MerkleHash | NFork MerkleHash MerkleHash deriving (Show, Eq, Ord) -- | Canonical serialization of a Node for hashing. -- Leaf: 0x00 -- Stem: 0x01 || child_hash (32 bytes) -- Fork: 0x02 || left_hash (32 bytes) || right_hash (32 bytes) serializeNode :: Node -> BS.ByteString serializeNode NLeaf = BS.pack [0x00] serializeNode (NStem h) = BS.pack [0x01] <> go (decode (encodeUtf8 h)) where go (Left _) = error "Research.serializeNode: invalid hex hash" go (Right bs) = bs serializeNode (NFork l r) = BS.pack [0x02] <> go (decode (encodeUtf8 l)) <> go (decode (encodeUtf8 r)) where go (Left _) = error "Research.serializeNode: invalid hex hash" go (Right bs) = bs -- | Hash a node per the Merkle content-addressing spec. -- hash = SHA256( "arboricx.merkle.node.v1" <> 0x00 <> node_payload ) nodeHash :: Node -> MerkleHash nodeHash node = decodeUtf8 (encode (sha256WithPrefix (serializeNode node))) where sha256WithPrefix payload = convert . (hash :: BS.ByteString -> Digest SHA256) $ utf8Tag <> BS.pack [0x00] <> payload utf8Tag = BS.pack $ map fromIntegral $ BS.unpack "arboricx.merkle.node.v1" -- | Deserialize a Node from canonical bytes. deserializeNode :: BS.ByteString -> Node deserializeNode bs = case BS.uncons bs of Just (0x00, rest) | BS.null rest -> NLeaf Just (0x01, rest) | BS.length rest == 32 -> NStem $ decodeUtf8 (encode rest) Just (0x02, rest) | BS.length rest == 64 -> let (l, r) = BS.splitAt 32 rest in NFork (decodeUtf8 (encode l)) (decodeUtf8 (encode r)) _ -> errorWithoutStackTrace "invalid merkle node payload" -- --------------------------------------------------------------------------- -- ByteString / bytestream marshalling via existing Tree Calculus conventions -- --------------------------------------------------------------------------- -- | Encode a single byte (Word8) as a Tree Calculus number (0..255). ofByte :: Word8 -> T ofByte = ofNumber . fromIntegral -- | Decode a Tree Calculus number as a single byte (Word8). -- Rejects values outside the range 0..255. toByte :: T -> Either String Word8 toByte t = case toNumber t of Left err -> Left err Right n | n >= 0 && n <= 255 -> Right (fromIntegral n) | otherwise -> Left ("Byte value out of range: " ++ show n) -- | Encode a ByteString as a Tree Calculus list of Byte trees. ofBytes :: BS.ByteString -> T ofBytes = ofList . map ofByte . BS.unpack -- | Decode a Tree Calculus list of Byte trees as a ByteString. -- Rejects non-list trees and elements that are not valid byte values (0..255). toBytes :: T -> Either String BS.ByteString toBytes t = case toList t of Left err -> Left err Right bs -> BS.pack <$> mapM toByte bs -- | Convert a canonical Arboricx node payload (ByteString) to a Tree -- representation (a list of Byte trees). nodePayloadToTreeBytes :: BS.ByteString -> T nodePayloadToTreeBytes = ofBytes -- | Convert a Tree representation of a node payload back to ByteString. treeBytesToNodePayload :: T -> Either String BS.ByteString treeBytesToNodePayload = toBytes -- | Convert a MerkleHash (hex-encoded) to a Tree of its 32 raw bytes. hashToTreeBytes :: MerkleHash -> Either String T hashToTreeBytes h = case decode (encodeUtf8 h) of Left _ -> Left "Invalid hex MerkleHash" Right raw | BS.length raw == 32 -> Right (ofBytes raw) | otherwise -> Left "Hash raw bytes must be 32 bytes" -- | Convert a Tree of 32 Byte trees back to a MerkleHash (hex string). treeBytesToHash :: T -> Either String MerkleHash treeBytesToHash t = case toList t of Left err -> Left err Right bytes | length bytes == 32 -> do raw <- BS.pack <$> mapM toByte bytes Right $ decodeUtf8 (encode raw) | otherwise -> Left "Expected exactly 32 byte elements for hash" -- | Build a Merkle DAG from a Tree Calculus term. buildMerkle :: T -> Node buildMerkle Leaf = NLeaf buildMerkle (Stem t) = NStem (nodeHash child) where child = buildMerkle t buildMerkle (Fork l r) = NFork (nodeHash left) (nodeHash right) where left = buildMerkle l right = buildMerkle r -- Tree Calculus Reduction Rules {- The t operator is left associative. 1. t t a b -> a 2. t (t a) b c -> a c (b c) 3a. t (t a b) c t -> a 3b. t (t a b) c (t u) -> b u 3c. t (t a b) c (t u v) -> c u v -} apply :: T -> T -> T apply (Fork Leaf a) _ = a apply (Fork (Stem a) b) c = apply (apply a c) (apply b c) apply (Fork (Fork _a _b) _c) Leaf = _a apply (Fork (Fork _a _b) _c) (Stem u) = apply _b u apply (Fork (Fork _a _b) _c) (Fork u v) = apply (apply _c u) v -- Left associative `t` apply Leaf b = Stem b apply (Stem a) b = Fork a b -- Booleans _false :: T _false = Leaf _true :: T _true = Stem Leaf _not :: T _not = Fork (Fork _true (Fork Leaf _false)) Leaf -- Marshalling ofString :: String -> T ofString str = ofList $ map (ofNumber . toInteger . fromEnum) str ofNumber :: Integer -> T ofNumber 0 = Leaf ofNumber n = Fork (if odd n then Stem Leaf else Leaf) (ofNumber (n `div` 2)) ofList :: [T] -> T ofList = foldr Fork Leaf toNumber :: T -> Either String Integer toNumber Leaf = Right 0 toNumber (Fork Leaf rest) = case toNumber rest of Right n -> Right (2 * n) Left err -> Left err toNumber (Fork (Stem Leaf) rest) = case toNumber rest of Right n -> Right (1 + 2 * n) Left err -> Left err toNumber _ = Left "Invalid Tree Calculus number" toChar :: Integer -> Either String Char toChar n | n < 0 = Left "Negative character code" | n > 0x10FFFF = Left "Character code out of Unicode range" | n >= 0xD800 && n <= 0xDFFF = Left "Surrogate character code not allowed" | otherwise = Right (toEnum (fromInteger n)) toString :: T -> Either String String toString tc = do list <- toList tc nums <- mapM toNumber list mapM toChar nums toList :: T -> Either String [T] toList Leaf = Right [] toList (Fork x rest) = case toList rest of Right xs -> Right (x : xs) Left err -> Left err toList _ = Left "Invalid Tree Calculus list" -- Outputs formatT :: EvaluatedForm -> T -> String formatT Tree = toSimpleT . show formatT FSL = show formatT AST = show . toAST formatT Ternary = toTernaryString formatT Ascii = toAscii formatT Decode = decodeResult formatT Number = either (\e -> "") show . toNumber formatT StringLit = either (\e -> "") show . toString toSimpleT :: String -> String toSimpleT s = T.unpack $ replace "Fork" "t" $ replace "Stem" "t" $ replace "Leaf" "t" $ T.pack s toTernaryString :: T -> String toTernaryString Leaf = "0" toTernaryString (Stem t) = "1" ++ toTernaryString t toTernaryString (Fork t1 t2) = "2" ++ toTernaryString t1 ++ toTernaryString t2 toAST :: T -> TricuAST toAST Leaf = TLeaf toAST (Stem a) = TStem (toAST a) toAST (Fork a b) = TFork (toAST a) (toAST b) toAscii :: T -> String toAscii tree = go tree "" True where go :: T -> String -> Bool -> String go Leaf prefix isLast = prefix ++ (if isLast then "`-- " else "|-- ") ++ "Leaf\n" go (Stem t) prefix isLast = prefix ++ (if isLast then "`-- " else "|-- ") ++ "Stem\n" ++ go t (prefix ++ (if isLast then " " else "| ")) True go (Fork left right) prefix isLast = prefix ++ (if isLast then "`-- " else "|-- ") ++ "Fork\n" ++ go left (prefix ++ (if isLast then " " else "| ")) False ++ go right (prefix ++ (if isLast then " " else "| ")) True decodeResult :: T -> String decodeResult Leaf = "t" decodeResult tc = case (toString tc, toList tc, toNumber tc) of (Right s, _, _) | all isCommonChar s -> "\"" ++ s ++ "\"" (_, _, Right n) -> show n (_, Right xs@(_:_), _) -> "[" ++ intercalate ", " (map decodeResult xs) ++ "]" (_, Right [], _) -> "[]" _ -> formatT Tree tc where isCommonChar c = let n = fromEnum c in (n >= 32 && n <= 126) || n == 9 || n == 10 || n == 13 -- --------------------------------------------------------------------------- -- DAG node-table export (for host-language kernel embedding) -- --------------------------------------------------------------------------- -- | Export a term's Merkle DAG as a topologically-sorted node table. -- Children appear before parents so all index references are forward. -- Returns (root index, list of (tag, [child_indices])). exportDag :: T -> (Int, [(String, [Int])]) exportDag term = let (root, acc, _) = collectDag term [] Set.empty -- acc is in reverse post-order (children first, root last) ordered = reverse acc idxMap = Map.fromList [(h, i) | (i, (h, _)) <- zip [0..] ordered] rootIdx = idxMap Map.! root lines_ = map (formatNode idxMap . snd) ordered in (rootIdx, lines_) where collectDag :: T -> [(MerkleHash, Node)] -> Set.Set MerkleHash -> (MerkleHash, [(MerkleHash, Node)], Set.Set MerkleHash) collectDag Leaf acc seen = let h = nodeHash NLeaf in if Set.member h seen then (h, acc, seen) else (h, (h, NLeaf) : acc, Set.insert h seen) collectDag (Stem t) acc seen = let (ch, acc', seen') = collectDag t acc seen node = NStem ch h = nodeHash node in if Set.member h seen' then (h, acc', seen') else (h, (h, node) : acc', Set.insert h seen') collectDag (Fork l r) acc seen = let (lh, acc', seen') = collectDag l acc seen (rh, acc'', seen'') = collectDag r acc' seen' node = NFork lh rh h = nodeHash node in if Set.member h seen'' then (h, acc'', seen'') else (h, (h, node) : acc'', Set.insert h seen'') formatNode :: Map.Map MerkleHash Int -> Node -> (String, [Int]) formatNode _ NLeaf = ("leaf", []) formatNode idxMap (NStem ch) = ("stem", [idxMap Map.! ch]) formatNode idxMap (NFork l r) = ("fork", [idxMap Map.! l, idxMap Map.! r])