406 lines
13 KiB
Haskell
406 lines
13 KiB
Haskell
{-# LANGUAGE PatternSynonyms #-}
|
|
|
|
module Research where
|
|
|
|
import Crypto.Hash (hash, SHA256, Digest)
|
|
import Data.ByteArray (convert)
|
|
import Data.ByteString.Base16 (decode, encode)
|
|
import Data.List (intercalate)
|
|
import Data.Map ()
|
|
import Data.Text (Text, replace)
|
|
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
|
|
import Data.Word (Word8)
|
|
import qualified Data.ByteString as BS
|
|
import qualified Data.Map as Map
|
|
import qualified Data.Set as Set
|
|
import qualified Data.Text as T
|
|
|
|
-- Tree Calculus Types
|
|
data T = Leaf | Stem T | Fork T T
|
|
deriving (Show, Eq, Ord)
|
|
|
|
-- View Contract source annotations
|
|
data ViewRef
|
|
= ViewRefInt Integer
|
|
| ViewRefText String
|
|
deriving (Show, Eq, Ord)
|
|
|
|
data ViewProvenance
|
|
= ViewChecked
|
|
| ViewTrusted
|
|
| ViewUnchecked
|
|
deriving (Show, Eq, Ord)
|
|
|
|
data ViewType
|
|
= VTName String
|
|
| VTVar Integer
|
|
| VTRefRaw ViewRef
|
|
| VTList ViewType
|
|
| VTMaybe ViewType
|
|
| VTPair ViewType ViewType
|
|
| VTResult ViewType ViewType
|
|
| VTGuarded ViewType T
|
|
| VTForall [Integer] ViewType
|
|
| VTExists [Integer] ViewType
|
|
| VTFn [ViewType] ViewType
|
|
deriving (Show, Eq, Ord)
|
|
|
|
pattern VTRef :: Integer -> ViewType
|
|
pattern VTRef n = VTRefRaw (ViewRefInt n)
|
|
|
|
pattern VTRefText :: String -> ViewType
|
|
pattern VTRefText s = VTRefRaw (ViewRefText s)
|
|
|
|
{-# COMPLETE VTName, VTVar, VTRef, VTRefText, VTList, VTMaybe, VTPair, VTResult, VTGuarded, VTForall, VTExists, VTFn #-}
|
|
|
|
data ViewExpr
|
|
= VEName String
|
|
| VEVar String
|
|
| VEVarId Integer
|
|
| VEInt Integer
|
|
| VEString String
|
|
| VEList [ViewExpr]
|
|
| VEApp ViewExpr ViewExpr
|
|
| VEForall [Integer] ViewExpr
|
|
| VEExists [Integer] ViewExpr
|
|
| VERaw String
|
|
deriving (Show, Eq, Ord)
|
|
|
|
data DefArg
|
|
= DefBinder String (Maybe ViewExpr)
|
|
| DefPhantom ViewExpr
|
|
deriving (Show, Eq, Ord)
|
|
|
|
-- Abstract Syntax Tree for tricu
|
|
data TricuAST
|
|
= SVar String (Maybe String)
|
|
| SInt Integer
|
|
| SStr String
|
|
| SList [TricuAST]
|
|
| SDef String [String] TricuAST
|
|
| SDefAnn String [DefArg] (Maybe ViewExpr) TricuAST
|
|
| SApp TricuAST TricuAST
|
|
| TLeaf
|
|
| TStem TricuAST
|
|
| TFork TricuAST TricuAST
|
|
| SLambda [String] TricuAST
|
|
| SEmpty
|
|
| SImport String String
|
|
deriving (Show, Eq, Ord)
|
|
|
|
-- Lexer Tokens
|
|
data LToken
|
|
= LIdentifier String
|
|
| LIdentifierWithHash String String
|
|
| LKeywordT
|
|
| LNamespace String
|
|
| LImport String String
|
|
| LAssign
|
|
| LAssignAt
|
|
| LAt
|
|
| LColon
|
|
| LDot
|
|
| LOpenParen
|
|
| LCloseParen
|
|
| LOpenBracket
|
|
| LCloseBracket
|
|
| LStringLiteral String
|
|
| LIntegerLiteral Int
|
|
| LArrowLeft
|
|
| LArrowRight
|
|
| LBindArrow
|
|
| LNewline
|
|
| LIndent Int
|
|
deriving (Eq, Show, Ord)
|
|
|
|
-- Output formats
|
|
data EvaluatedForm = Tree | FSL | AST | Ternary | Ascii | Decode | Number | StringLit
|
|
deriving (Show)
|
|
|
|
-- Environment containing previously evaluated TC terms
|
|
type Env = Map.Map String T
|
|
|
|
-- Merkle DAG Node types
|
|
-- Each Tree Calculus node becomes a content-addressed object.
|
|
type MerkleHash = Text
|
|
|
|
data Node
|
|
= NLeaf
|
|
| NStem MerkleHash
|
|
| NFork MerkleHash MerkleHash
|
|
deriving (Show, Eq, Ord)
|
|
|
|
-- | Canonical serialization of a Node for hashing.
|
|
-- Leaf: 0x00
|
|
-- Stem: 0x01 || child_hash (32 bytes)
|
|
-- Fork: 0x02 || left_hash (32 bytes) || right_hash (32 bytes)
|
|
serializeNode :: Node -> BS.ByteString
|
|
serializeNode NLeaf = BS.pack [0x00]
|
|
serializeNode (NStem h) = BS.pack [0x01] <> go (decode (encodeUtf8 h))
|
|
where go (Left _) = error "Research.serializeNode: invalid hex hash"
|
|
go (Right bs) = bs
|
|
serializeNode (NFork l r) = BS.pack [0x02] <> go (decode (encodeUtf8 l)) <> go (decode (encodeUtf8 r))
|
|
where go (Left _) = error "Research.serializeNode: invalid hex hash"
|
|
go (Right bs) = bs
|
|
|
|
-- | Hash a node per the Merkle content-addressing spec.
|
|
-- hash = SHA256( "arboricx.merkle.node.v1" <> 0x00 <> node_payload )
|
|
nodeHash :: Node -> MerkleHash
|
|
nodeHash node = decodeUtf8 (encode (sha256WithPrefix (serializeNode node)))
|
|
where sha256WithPrefix payload =
|
|
convert . (hash :: BS.ByteString -> Digest SHA256) $ utf8Tag <> BS.pack [0x00] <> payload
|
|
utf8Tag = BS.pack $ map fromIntegral $ BS.unpack "arboricx.merkle.node.v1"
|
|
|
|
-- | Deserialize a Node from canonical bytes.
|
|
deserializeNode :: BS.ByteString -> Node
|
|
deserializeNode bs =
|
|
case BS.uncons bs of
|
|
Just (0x00, rest)
|
|
| BS.null rest -> NLeaf
|
|
|
|
Just (0x01, rest)
|
|
| BS.length rest == 32 ->
|
|
NStem $ decodeUtf8 (encode rest)
|
|
|
|
Just (0x02, rest)
|
|
| BS.length rest == 64 ->
|
|
let (l, r) = BS.splitAt 32 rest
|
|
in NFork (decodeUtf8 (encode l)) (decodeUtf8 (encode r))
|
|
|
|
_ -> errorWithoutStackTrace "invalid merkle node payload"
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- ByteString / bytestream marshalling via existing Tree Calculus conventions
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
-- | Encode a single byte (Word8) as a Tree Calculus number (0..255).
|
|
ofByte :: Word8 -> T
|
|
ofByte = ofNumber . fromIntegral
|
|
|
|
-- | Decode a Tree Calculus number as a single byte (Word8).
|
|
-- Rejects values outside the range 0..255.
|
|
toByte :: T -> Either String Word8
|
|
toByte t = case toNumber t of
|
|
Left err -> Left err
|
|
Right n
|
|
| n >= 0 && n <= 255 -> Right (fromIntegral n)
|
|
| otherwise -> Left ("Byte value out of range: " ++ show n)
|
|
|
|
-- | Encode a ByteString as a Tree Calculus list of Byte trees.
|
|
ofBytes :: BS.ByteString -> T
|
|
ofBytes = ofList . map ofByte . BS.unpack
|
|
|
|
-- | Decode a Tree Calculus list of Byte trees as a ByteString.
|
|
-- Rejects non-list trees and elements that are not valid byte values (0..255).
|
|
toBytes :: T -> Either String BS.ByteString
|
|
toBytes t = case toList t of
|
|
Left err -> Left err
|
|
Right bs -> BS.pack <$> mapM toByte bs
|
|
|
|
-- | Convert a canonical Arboricx node payload (ByteString) to a Tree
|
|
-- representation (a list of Byte trees).
|
|
nodePayloadToTreeBytes :: BS.ByteString -> T
|
|
nodePayloadToTreeBytes = ofBytes
|
|
|
|
-- | Convert a Tree representation of a node payload back to ByteString.
|
|
treeBytesToNodePayload :: T -> Either String BS.ByteString
|
|
treeBytesToNodePayload = toBytes
|
|
|
|
-- | Convert a MerkleHash (hex-encoded) to a Tree of its 32 raw bytes.
|
|
hashToTreeBytes :: MerkleHash -> Either String T
|
|
hashToTreeBytes h = case decode (encodeUtf8 h) of
|
|
Left _ -> Left "Invalid hex MerkleHash"
|
|
Right raw
|
|
| BS.length raw == 32 -> Right (ofBytes raw)
|
|
| otherwise -> Left "Hash raw bytes must be 32 bytes"
|
|
|
|
-- | Convert a Tree of 32 Byte trees back to a MerkleHash (hex string).
|
|
treeBytesToHash :: T -> Either String MerkleHash
|
|
treeBytesToHash t = case toList t of
|
|
Left err -> Left err
|
|
Right bytes
|
|
| length bytes == 32 -> do
|
|
raw <- BS.pack <$> mapM toByte bytes
|
|
Right $ decodeUtf8 (encode raw)
|
|
| otherwise -> Left "Expected exactly 32 byte elements for hash"
|
|
|
|
-- | Build a Merkle DAG from a Tree Calculus term.
|
|
buildMerkle :: T -> Node
|
|
buildMerkle Leaf = NLeaf
|
|
buildMerkle (Stem t) = NStem (nodeHash child)
|
|
where child = buildMerkle t
|
|
buildMerkle (Fork l r) = NFork (nodeHash left) (nodeHash right)
|
|
where
|
|
left = buildMerkle l
|
|
right = buildMerkle r
|
|
|
|
-- Tree Calculus Reduction Rules
|
|
{-
|
|
The t operator is left associative.
|
|
1. t t a b -> a
|
|
2. t (t a) b c -> a c (b c)
|
|
3a. t (t a b) c t -> a
|
|
3b. t (t a b) c (t u) -> b u
|
|
3c. t (t a b) c (t u v) -> c u v
|
|
-}
|
|
apply :: T -> T -> T
|
|
apply (Fork Leaf a) _ = a
|
|
apply (Fork (Stem a) b) c = apply (apply a c) (apply b c)
|
|
apply (Fork (Fork _a _b) _c) Leaf = _a
|
|
apply (Fork (Fork _a _b) _c) (Stem u) = apply _b u
|
|
apply (Fork (Fork _a _b) _c) (Fork u v) = apply (apply _c u) v
|
|
-- Left associative `t`
|
|
apply Leaf b = Stem b
|
|
apply (Stem a) b = Fork a b
|
|
|
|
-- Booleans
|
|
_false :: T
|
|
_false = Leaf
|
|
|
|
_true :: T
|
|
_true = Stem Leaf
|
|
|
|
_not :: T
|
|
_not = Fork (Fork _true (Fork Leaf _false)) Leaf
|
|
|
|
-- Marshalling
|
|
ofString :: String -> T
|
|
ofString str = ofList $ map (ofNumber . toInteger . fromEnum) str
|
|
|
|
ofNumber :: Integer -> T
|
|
ofNumber 0 = Leaf
|
|
ofNumber n =
|
|
Fork
|
|
(if odd n then Stem Leaf else Leaf)
|
|
(ofNumber (n `div` 2))
|
|
|
|
ofList :: [T] -> T
|
|
ofList = foldr Fork Leaf
|
|
|
|
toNumber :: T -> Either String Integer
|
|
toNumber Leaf = Right 0
|
|
toNumber (Fork Leaf rest) = case toNumber rest of
|
|
Right n -> Right (2 * n)
|
|
Left err -> Left err
|
|
toNumber (Fork (Stem Leaf) rest) = case toNumber rest of
|
|
Right n -> Right (1 + 2 * n)
|
|
Left err -> Left err
|
|
toNumber _ = Left "Invalid Tree Calculus number"
|
|
|
|
toChar :: Integer -> Either String Char
|
|
toChar n
|
|
| n < 0 = Left "Negative character code"
|
|
| n > 0x10FFFF = Left "Character code out of Unicode range"
|
|
| n >= 0xD800 && n <= 0xDFFF = Left "Surrogate character code not allowed"
|
|
| otherwise = Right (toEnum (fromInteger n))
|
|
|
|
toString :: T -> Either String String
|
|
toString tc = do
|
|
list <- toList tc
|
|
nums <- mapM toNumber list
|
|
mapM toChar nums
|
|
|
|
toList :: T -> Either String [T]
|
|
toList Leaf = Right []
|
|
toList (Fork x rest) = case toList rest of
|
|
Right xs -> Right (x : xs)
|
|
Left err -> Left err
|
|
toList _ = Left "Invalid Tree Calculus list"
|
|
|
|
-- Outputs
|
|
formatT :: EvaluatedForm -> T -> String
|
|
formatT Tree = toSimpleT . show
|
|
formatT FSL = show
|
|
formatT AST = show . toAST
|
|
formatT Ternary = toTernaryString
|
|
formatT Ascii = toAscii
|
|
formatT Decode = decodeResult
|
|
formatT Number = either (\e -> "<not-number: " ++ e ++ ">") show . toNumber
|
|
formatT StringLit = either (\e -> "<not-string: " ++ e ++ ">") show . toString
|
|
|
|
toSimpleT :: String -> String
|
|
toSimpleT s = T.unpack
|
|
$ replace "Fork" "t"
|
|
$ replace "Stem" "t"
|
|
$ replace "Leaf" "t"
|
|
$ T.pack s
|
|
|
|
toTernaryString :: T -> String
|
|
toTernaryString Leaf = "0"
|
|
toTernaryString (Stem t) = "1" ++ toTernaryString t
|
|
toTernaryString (Fork t1 t2) = "2" ++ toTernaryString t1 ++ toTernaryString t2
|
|
|
|
toAST :: T -> TricuAST
|
|
toAST Leaf = TLeaf
|
|
toAST (Stem a) = TStem (toAST a)
|
|
toAST (Fork a b) = TFork (toAST a) (toAST b)
|
|
|
|
toAscii :: T -> String
|
|
toAscii tree = go tree "" True
|
|
where
|
|
go :: T -> String -> Bool -> String
|
|
go Leaf prefix isLast =
|
|
prefix ++ (if isLast then "`-- " else "|-- ") ++ "Leaf\n"
|
|
go (Stem t) prefix isLast =
|
|
prefix ++ (if isLast then "`-- " else "|-- ") ++ "Stem\n"
|
|
++ go t (prefix ++ (if isLast then " " else "| ")) True
|
|
go (Fork left right) prefix isLast =
|
|
prefix ++ (if isLast then "`-- " else "|-- ") ++ "Fork\n"
|
|
++ go left (prefix ++ (if isLast then " " else "| ")) False
|
|
++ go right (prefix ++ (if isLast then " " else "| ")) True
|
|
|
|
decodeResult :: T -> String
|
|
decodeResult Leaf = "t"
|
|
decodeResult tc =
|
|
case (toString tc, toList tc, toNumber tc) of
|
|
(Right s, _, _) | all isCommonChar s -> "\"" ++ s ++ "\""
|
|
(_, _, Right n) -> show n
|
|
(_, Right xs@(_:_), _) -> "[" ++ intercalate ", " (map decodeResult xs) ++ "]"
|
|
(_, Right [], _) -> "[]"
|
|
_ -> formatT Tree tc
|
|
where
|
|
isCommonChar c =
|
|
let n = fromEnum c
|
|
in (n >= 32 && n <= 126)
|
|
|| n == 9
|
|
|| n == 10
|
|
|| n == 13
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- DAG node-table export (for host-language kernel embedding)
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
-- | Export a term's Merkle DAG as a topologically-sorted node table.
|
|
-- Children appear before parents so all index references are forward.
|
|
-- Returns (root index, list of (tag, [child_indices])).
|
|
exportDag :: T -> (Int, [(String, [Int])])
|
|
exportDag term =
|
|
let (root, acc, _) = collectDag term [] Set.empty
|
|
-- acc is in reverse post-order (children first, root last)
|
|
ordered = reverse acc
|
|
idxMap = Map.fromList [(h, i) | (i, (h, _)) <- zip [0..] ordered]
|
|
rootIdx = idxMap Map.! root
|
|
lines_ = map (formatNode idxMap . snd) ordered
|
|
in (rootIdx, lines_)
|
|
where
|
|
collectDag :: T -> [(MerkleHash, Node)] -> Set.Set MerkleHash -> (MerkleHash, [(MerkleHash, Node)], Set.Set MerkleHash)
|
|
collectDag Leaf acc seen =
|
|
let h = nodeHash NLeaf
|
|
in if Set.member h seen then (h, acc, seen) else (h, (h, NLeaf) : acc, Set.insert h seen)
|
|
collectDag (Stem t) acc seen =
|
|
let (ch, acc', seen') = collectDag t acc seen
|
|
node = NStem ch
|
|
h = nodeHash node
|
|
in if Set.member h seen' then (h, acc', seen') else (h, (h, node) : acc', Set.insert h seen')
|
|
collectDag (Fork l r) acc seen =
|
|
let (lh, acc', seen') = collectDag l acc seen
|
|
(rh, acc'', seen'') = collectDag r acc' seen'
|
|
node = NFork lh rh
|
|
h = nodeHash node
|
|
in if Set.member h seen'' then (h, acc'', seen'') else (h, (h, node) : acc'', Set.insert h seen'')
|
|
|
|
formatNode :: Map.Map MerkleHash Int -> Node -> (String, [Int])
|
|
formatNode _ NLeaf = ("leaf", [])
|
|
formatNode idxMap (NStem ch) = ("stem", [idxMap Map.! ch])
|
|
formatNode idxMap (NFork l r) = ("fork", [idxMap Map.! l, idxMap Map.! r])
|