Files
tricu/src/Wire.hs
James Eversole e3117e3ac8 Switch manifest serialization to CBOR
Replace JSON-based bundle manifest with a CBOR-encoded format. The manifest
is now a canonical CBOR map with order-strict key decoding, raw 32-byte hash
payloads (instead of hex-encoded JSON), and compact binary representation.
2026-05-07 21:41:50 -05:00

932 lines
34 KiB
Haskell

{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
module Wire
( Bundle (..)
, BundleManifest (..)
, TreeSpec (..)
, NodeHashSpec (..)
, RuntimeSpec (..)
, BundleRoot (..)
, BundleExport (..)
, BundleMetadata
, ClosureMode (..)
, encodeBundle
, decodeBundle
, verifyBundle
, collectReachableNodes
, exportBundle
, exportNamedBundle
, importBundle
, defaultExportNames
) where
import ContentStore (getNodeMerkle, loadTree, putMerkleNode, storeTerm)
import Research
import Codec.CBOR.Decoding ( Decoder
, decodeString
, decodeBytes
, decodeListLen
, decodeMapLen
)
import Control.Monad (replicateM, forM)
import Codec.CBOR.Encoding ( Encoding
, encodeMapLen
, encodeListLen
, encodeString
, encodeBytes
)
import Codec.CBOR.Write (toLazyByteString)
import Data.Monoid (mconcat)
import Codec.CBOR.Read (deserialiseFromBytes, DeserialiseFailure(..))
import Control.Exception (SomeException, evaluate, try)
import Control.Monad (foldM, unless, when)
import Crypto.Hash (Digest, SHA256, hash)
import Data.Bits ((.&.), (.|.), shiftL, shiftR)
import Data.ByteArray (convert)
import Data.ByteString (ByteString)
import Data.Foldable (traverse_)
import Data.Map (Map)
import Data.Text (Text, unpack)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Word (Word16, Word32, Word64)
import Database.SQLite.Simple (Connection)
import GHC.Generics (Generic)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base16 as Base16
import qualified Data.ByteString.Lazy as BL
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as T
-- | Portable bundle major/minor version supported by this module.
bundleMajorVersion :: Word16
bundleMajorVersion = 1
bundleMinorVersion :: Word16
bundleMinorVersion = 0
-- | Header magic for the portable executable-object container.
bundleMagic :: ByteString
bundleMagic = BS.pack [0x41, 0x52, 0x42, 0x4f, 0x52, 0x49, 0x58, 0x00] -- "ARBORIX\0"
headerLength :: Int
headerLength = 32
sectionEntryLength :: Int
sectionEntryLength = 60
sectionManifest, sectionNodes :: Word32
sectionManifest = 1
sectionNodes = 2
flagCritical :: Word16
flagCritical = 0x0001
compressionNone, digestSha256 :: Word16
compressionNone = 0
digestSha256 = 1
-- ---------------------------------------------------------------------------
-- CBOR encoding helpers
-- ---------------------------------------------------------------------------
-- | Canonical CBOR map length encoder.
cmkLen :: Int -> Encoding
cmkLen n = encodeMapLen (fromIntegral n)
-- | Decode a CBOR array of n elements.
decodeListN :: Decoder s a -> Int -> Decoder s [a]
decodeListN dec n = replicateM n dec
-- | Decode a CBOR map (sequence of key-value pairs).
decodeMapN :: Decoder s a -> Decoder s b -> Int -> Decoder s [(a, b)]
decodeMapN keyDec valDec n = forM [1..n] $ \_ ->
keyDec >>= \k -> valDec >>= \v -> pure (k, v)
decodeKey :: Text -> Decoder s ()
decodeKey expected = do
actual <- decodeString
unless (actual == expected) $
fail $ "expected key " ++ show expected ++ ", got " ++ show actual
-- | Canonical CBOR array length encoder.
cakLen :: Int -> Encoding
cakLen n = encodeListLen (fromIntegral n)
-- | Encode a canonical CBOR map with key-value pairs as flat sequence.
cmkPairs :: [(Text, Encoding)] -> Encoding
cmkPairs [] = cmkLen 0
cmkPairs kvs = cmkLen (length kvs) <> mconcat [encodeString k <> v | (k, v) <- kvs]
-- | Encode a canonical CBOR array.
cakSeq :: [Encoding] -> Encoding
cakSeq [] = cakLen 0
cakSeq xs = cakLen (length xs) <> mconcat xs
-- | Encode a canonical CBOR text string.
encText :: Text -> Encoding
encText = encodeString
-- | Encode a canonical CBOR byte string.
encBytes :: ByteString -> Encoding
encBytes = encodeBytes
-- ---------------------------------------------------------------------------
-- Data types with CBOR instances
-- ---------------------------------------------------------------------------
-- | Closure declaration.
data ClosureMode = ClosureComplete | ClosurePartial
deriving (Show, Eq, Ord, Generic)
toCBORClosure :: ClosureMode -> Encoding
toCBORClosure = encText . \case
ClosureComplete -> "complete"
ClosurePartial -> "partial"
closureFromCBOR :: Decoder s ClosureMode
closureFromCBOR = decodeString >>= \case
"complete" -> pure ClosureComplete
"partial" -> pure ClosurePartial
other -> fail $ "ClosureMode: " ++ show other
-- | Hash specification (algorithm + domain strings).
data NodeHashSpec = NodeHashSpec
{ nodeHashAlgorithm :: Text
, nodeHashDomain :: Text
} deriving (Show, Eq, Ord, Generic)
toCBORNodeHashSpec :: NodeHashSpec -> Encoding
toCBORNodeHashSpec (NodeHashSpec alg dom) =
cmkPairs
[ ("algorithm", encText alg)
, ("domain", encText dom)
]
nodeHashSpecFromCBOR :: Decoder s NodeHashSpec
nodeHashSpecFromCBOR = do
n <- decodeMapLen
unless (n == 2) $ fail "NodeHashSpec: must have exactly 2 entries"
decodeKey "algorithm"
alg <- decodeString
decodeKey "domain"
dom <- decodeString
pure (NodeHashSpec alg dom)
-- | Tree specification.
data TreeSpec = TreeSpec
{ treeCalculus :: Text
, treeNodeHash :: NodeHashSpec
, treeNodePayload :: Text
} deriving (Show, Eq, Ord, Generic)
toCBORTreeSpec :: TreeSpec -> Encoding
toCBORTreeSpec (TreeSpec calc hspec payload) =
cmkPairs
[ ("calculus", encText calc)
, ("nodeHash", toCBORNodeHashSpec hspec)
, ("nodePayload", encText payload)
]
treeSpecFromCBOR :: Decoder s TreeSpec
treeSpecFromCBOR = do
n <- decodeMapLen
unless (n == 3) $ fail "TreeSpec: must have exactly 3 entries"
decodeKey "calculus"
calc <- decodeString
decodeKey "nodeHash"
hspec <- nodeHashSpecFromCBOR
decodeKey "nodePayload"
payload <- decodeString
pure (TreeSpec calc hspec payload)
-- | Runtime specification.
data RuntimeSpec = RuntimeSpec
{ runtimeSemantics :: Text
, runtimeEvaluation :: Text
, runtimeAbi :: Text
, runtimeCapabilities :: [Text]
} deriving (Show, Eq, Ord, Generic)
toCBORRuntimeSpec :: RuntimeSpec -> Encoding
toCBORRuntimeSpec (RuntimeSpec sem eval abi caps) =
cmkPairs
[ ("semantics", encText sem)
, ("evaluation", encText eval)
, ("abi", encText abi)
, ("capabilities", cakSeq (map encText caps))
]
runtimeSpecFromCBOR :: Decoder s RuntimeSpec
runtimeSpecFromCBOR = do
n <- decodeMapLen
unless (n == 4) $ fail "RuntimeSpec: must have exactly 4 entries"
decodeKey "semantics"
sem <- decodeString
decodeKey "evaluation"
eval <- decodeString
decodeKey "abi"
abi <- decodeString
decodeKey "capabilities"
clen <- decodeListLen
caps <- decodeListN decodeString clen
pure (RuntimeSpec sem eval abi caps)
-- | A root hash reference.
data BundleRoot = BundleRoot
{ rootHash :: MerkleHash
, rootRole :: Text
} deriving (Show, Eq, Ord, Generic)
toCBORBundleRoot :: BundleRoot -> Encoding
toCBORBundleRoot (BundleRoot h role) =
cmkPairs
[ ("hash", encBytes (merkleHashToRaw h))
, ("role", encText role)
]
bundleRootFromCBOR :: Decoder s BundleRoot
bundleRootFromCBOR = do
n <- decodeMapLen
unless (n == 2) $ fail "BundleRoot: must have exactly 2 entries"
decodeKey "hash"
hRaw <- decodeBytes
decodeKey "role"
role <- decodeString
pure (BundleRoot (rawToMerkleHash hRaw) role)
-- | An export entry.
data BundleExport = BundleExport
{ exportName :: Text
, exportRoot :: MerkleHash
, exportKind :: Text
, exportAbi :: Text
} deriving (Show, Eq, Ord, Generic)
toCBORBundleExport :: BundleExport -> Encoding
toCBORBundleExport (BundleExport name h kind abi) =
cmkPairs
[ ("name", encText name)
, ("root", encBytes (merkleHashToRaw h))
, ("kind", encText kind)
, ("abi", encText abi)
]
bundleExportFromCBOR :: Decoder s BundleExport
bundleExportFromCBOR = do
n <- decodeMapLen
unless (n == 4) $ fail "BundleExport: must have exactly 4 entries"
decodeKey "name"
name <- decodeString
decodeKey "root"
hRaw <- decodeBytes
decodeKey "kind"
kind <- decodeString
decodeKey "abi"
abi <- decodeString
pure (BundleExport name (rawToMerkleHash hRaw) kind abi)
-- | Optional package metadata.
data BundleMetadata = BundleMetadata
{ metadataPackage :: Maybe Text
, metadataVersion :: Maybe Text
, metadataDescription :: Maybe Text
, metadataLicense :: Maybe Text
, metadataCreatedBy :: Maybe Text
} deriving (Show, Eq, Ord, Generic)
metadataFromCBOR :: Decoder s BundleMetadata
metadataFromCBOR = do
mlen <- decodeMapLen
entries <- decodeMapN decodeString decodeString mlen
let lookupText k = go k entries
go _ [] = Nothing
go k ((k', v):rest)
| k == k' = Just v
| otherwise = go k rest
pure BundleMetadata
{ metadataPackage = lookupText "package"
, metadataVersion = lookupText "version"
, metadataDescription = lookupText "description"
, metadataLicense = lookupText "license"
, metadataCreatedBy = lookupText "createdBy"
}
metadataToCBOR :: BundleMetadata -> Encoding
metadataToCBOR (BundleMetadata pkg ver desc lic by) =
let pairs =
maybe [] (\v -> [("package", encText v)]) pkg
++ maybe [] (\v -> [("version", encText v)]) ver
++ maybe [] (\v -> [("description", encText v)]) desc
++ maybe [] (\v -> [("license", encText v)]) lic
++ maybe [] (\v -> [("createdBy", encText v)]) by
in cmkPairs pairs
-- | The manifest: top-level bundle metadata.
data BundleManifest = BundleManifest
{ manifestSchema :: Text
, manifestBundleType :: Text
, manifestTree :: TreeSpec
, manifestRuntime :: RuntimeSpec
, manifestClosure :: ClosureMode
, manifestRoots :: [BundleRoot]
, manifestExports :: [BundleExport]
, manifestMetadata :: BundleMetadata
} deriving (Show, Eq, Generic)
manifestToCBOR :: BundleManifest -> Encoding
manifestToCBOR m =
cmkPairs
[ ("schema", encText (manifestSchema m))
, ("bundleType", encText (manifestBundleType m))
, ("tree", toCBORTreeSpec (manifestTree m))
, ("runtime", toCBORRuntimeSpec (manifestRuntime m))
, ("closure", toCBORClosure (manifestClosure m))
, ("roots", cakSeq (map toCBORBundleRoot (manifestRoots m)))
, ("exports", cakSeq (map toCBORBundleExport (manifestExports m)))
, ("metadata", metadataToCBOR (manifestMetadata m))
]
manifestFromCBOR :: Decoder s BundleManifest
manifestFromCBOR = do
n <- decodeMapLen
unless (n == 8) $ fail "BundleManifest: must have exactly 8 entries"
decodeKey "schema"
schema <- decodeString
decodeKey "bundleType"
bundleType <- decodeString
decodeKey "tree"
tree <- treeSpecFromCBOR
decodeKey "runtime"
runtime <- runtimeSpecFromCBOR
decodeKey "closure"
closure <- closureFromCBOR
decodeKey "roots"
rlen <- decodeListLen
roots <- decodeListN bundleRootFromCBOR rlen
decodeKey "exports"
elen <- decodeListLen
exports <- decodeListN bundleExportFromCBOR elen
decodeKey "metadata"
metadata <- metadataFromCBOR
pure (BundleManifest schema bundleType tree runtime closure roots exports metadata)
-- | Portable executable-object bundle.
--
-- Merkle node payloads remain the language-neutral executable core:
-- Leaf = 0x00; Stem = 0x01 || child_hash; Fork = 0x02 || left_hash || right_hash.
-- Names, exports, runtime metadata, and package metadata live in the manifest layer.
data Bundle = Bundle
{ bundleVersion :: Word16
, bundleRoots :: [MerkleHash]
, bundleNodes :: Map MerkleHash ByteString
, bundleManifest :: BundleManifest
, bundleManifestBytes :: ByteString
} deriving (Show, Eq)
-- ---------------------------------------------------------------------------
-- CBOR manifest serialization
-- ---------------------------------------------------------------------------
-- | Encode the manifest as canonical CBOR.
encodeManifest :: BundleManifest -> ByteString
encodeManifest m = BL.toStrict (toLazyByteString (manifestToCBOR m))
-- | Decode a manifest from CBOR bytes.
decodeManifest :: ByteString -> Either String BundleManifest
decodeManifest bs =
case deserialiseFromBytes manifestFromCBOR (BL.fromStrict bs) of
Right (rest, m)
| BS.null (BL.toStrict rest) -> Right m
| otherwise -> Left "trailing bytes after manifest CBOR"
Left (DeserialiseFailure _ msg) -> Left msg
-- ---------------------------------------------------------------------------
-- Bundle encoding
-- ---------------------------------------------------------------------------
-- | Encode a Bundle to portable Bundle v1 bytes.
encodeBundle :: Bundle -> ByteString
encodeBundle bundle =
let nodeSection = encodeNodeSection (bundleNodes bundle)
manifestBytes = if BS.null (bundleManifestBytes bundle)
then encodeManifest (bundleManifest bundle)
else bundleManifestBytes bundle
sectionCount = 2
dirOffset = fromIntegral headerLength
sectionDirLength = sectionCount * sectionEntryLength
manifestOffset = fromIntegral (headerLength + sectionDirLength)
nodesOffset = manifestOffset + fromIntegral (BS.length manifestBytes)
manifestEntry = encodeSectionEntry sectionManifest 1 flagCritical compressionNone
manifestOffset (fromIntegral $ BS.length manifestBytes) manifestBytes
nodesEntry = encodeSectionEntry sectionNodes 1 flagCritical compressionNone
nodesOffset (fromIntegral $ BS.length nodeSection) nodeSection
header = encodeHeader bundleMajorVersion bundleMinorVersion
(fromIntegral sectionCount) 0 dirOffset
in header <> manifestEntry <> nodesEntry <> manifestBytes <> nodeSection
-- | Decode portable Bundle v1 bytes.
decodeBundle :: ByteString -> Either String Bundle
decodeBundle bs
| BS.take (BS.length bundleMagic) bs == bundleMagic = decodePortableBundle bs
| otherwise = Left "invalid magic"
-- ---------------------------------------------------------------------------
-- Portable container encoding / decoding
-- ---------------------------------------------------------------------------
data SectionEntry = SectionEntry
{ seType :: Word32
, seVersion :: Word16
, seFlags :: Word16
, seCompression :: Word16
, seDigestAlgorithm :: Word16
, seOffset :: Word64
, seLength :: Word64
, seDigest :: ByteString
} deriving (Show, Eq)
encodeHeader :: Word16 -> Word16 -> Word32 -> Word64 -> Word64 -> ByteString
encodeHeader major minor sectionCount flags dirOffset =
bundleMagic
<> encode16 major
<> encode16 minor
<> encode32 sectionCount
<> encode64 flags
<> encode64 dirOffset
encodeSectionEntry :: Word32 -> Word16 -> Word16 -> Word16 -> Word64 -> Word64 -> ByteString -> ByteString
encodeSectionEntry sectionType sectionVersion sectionFlags compression offset lengthBytes sectionBytes =
encode32 sectionType
<> encode16 sectionVersion
<> encode16 sectionFlags
<> encode16 compression
<> encode16 digestSha256
<> encode64 offset
<> encode64 lengthBytes
<> sha256 sectionBytes
decodePortableBundle :: ByteString -> Either String Bundle
decodePortableBundle bs = do
(major, minor, sectionCount, _flags, dirOffset) <- decodePortableHeader bs
when (major /= bundleMajorVersion) $
Left $ "unsupported bundle major version: " ++ show major
let dirStart = fromIntegral dirOffset
dirBytes = fromIntegral sectionCount * sectionEntryLength
when (BS.length bs < dirStart + dirBytes) $
Left "bundle truncated in section directory"
let dirRaw = BS.take dirBytes $ BS.drop dirStart bs
entries <- decodeSectionEntries sectionCount dirRaw
traverse_ rejectUnknownCritical entries
manifestEntry <- requireSection sectionManifest entries
nodesEntry <- requireSection sectionNodes entries
manifestBytes <- readAndVerifySection bs manifestEntry
nodesBytes <- readAndVerifySection bs nodesEntry
manifest <- decodeManifest manifestBytes
nodes <- decodeNodeSection nodesBytes
let roots = map rootHash (manifestRoots manifest)
return Bundle
{ bundleVersion = major * 1000 + minor
, bundleRoots = roots
, bundleNodes = nodes
, bundleManifest = manifest
, bundleManifestBytes = manifestBytes
}
rejectUnknownCritical :: SectionEntry -> Either String ()
rejectUnknownCritical entry =
let known = seType entry `elem` [sectionManifest, sectionNodes]
critical = seFlags entry .&. flagCritical /= 0
in when (critical && not known) $
Left $ "unknown critical section type: " ++ show (seType entry)
requireSection :: Word32 -> [SectionEntry] -> Either String SectionEntry
requireSection sectionType entries =
case filter ((== sectionType) . seType) entries of
[entry] -> Right entry
[] -> Left $ "missing required section type: " ++ show sectionType
_ -> Left $ "duplicate section type: " ++ show sectionType
readAndVerifySection :: ByteString -> SectionEntry -> Either String ByteString
readAndVerifySection bs entry = do
when (seCompression entry /= compressionNone) $
Left $ "unsupported compression codec in section " ++ show (seType entry)
when (seDigestAlgorithm entry /= digestSha256) $
Left $ "unsupported digest algorithm in section " ++ show (seType entry)
let offset = fromIntegral (seOffset entry)
len = fromIntegral (seLength entry)
when (offset < 0 || len < 0 || BS.length bs < offset + len) $
Left $ "section extends beyond bundle end: " ++ show (seType entry)
let sectionBytes = BS.take len $ BS.drop offset bs
when (sha256 sectionBytes /= seDigest entry) $
Left $ "section digest mismatch: " ++ show (seType entry)
Right sectionBytes
decodePortableHeader :: ByteString -> Either String (Word16, Word16, Word32, Word64, Word64)
decodePortableHeader bs
| BS.length bs < headerLength = Left "bundle too short for header"
| BS.take 8 bs /= bundleMagic = Left "invalid portable bundle magic"
| otherwise = do
(major, r1) <- decode16be "major_version" (BS.drop 8 bs)
(minor, r2) <- decode16be "minor_version" r1
(sectionCount, r3) <- decode32be "section_count" r2
(flags, r4) <- decode64be "flags" r3
(dirOffset, _) <- decode64be "directory_offset" r4
Right (major, minor, sectionCount, flags, dirOffset)
decodeSectionEntries :: Word32 -> ByteString -> Either String [SectionEntry]
decodeSectionEntries count bytes = reverse <$> go count bytes []
where
go 0 _ acc = Right acc
go n bs acc = do
when (BS.length bs < sectionEntryLength) $
Left "section directory truncated"
(sectionType, r1) <- decode32be "section_type" bs
(sectionVersion, r2) <- decode16be "section_version" r1
(sectionFlags, r3) <- decode16be "section_flags" r2
(compression, r4) <- decode16be "compression_codec" r3
(digAlg, r5) <- decode16be "digest_algorithm" r4
(offset, r6) <- decode64be "section_offset" r5
(len, r7) <- decode64be "section_length" r6
let (dig, rest) = BS.splitAt 32 r7
when (BS.length dig /= 32) $ Left "section digest truncated"
let entry = SectionEntry sectionType sectionVersion sectionFlags compression digAlg offset len dig
go (n - 1) rest (entry : acc)
-- ---------------------------------------------------------------------------
-- Manifest construction
-- ---------------------------------------------------------------------------
defaultManifest :: [(Text, MerkleHash)] -> BundleManifest
defaultManifest namedRoots = BundleManifest
{ manifestSchema = "arborix.bundle.manifest.v1"
, manifestBundleType = "tree-calculus-executable-object"
, manifestTree = TreeSpec
{ treeCalculus = "tree-calculus.v1"
, treeNodeHash = NodeHashSpec
{ nodeHashAlgorithm = "sha256"
, nodeHashDomain = "arborix.merkle.node.v1"
}
, treeNodePayload = "arborix.merkle.payload.v1"
}
, manifestRuntime = RuntimeSpec
{ runtimeSemantics = "tree-calculus.v1"
, runtimeEvaluation = "normal-order"
, runtimeAbi = "arborix.abi.tree.v1"
, runtimeCapabilities = []
}
, manifestClosure = ClosureComplete
, manifestRoots = zipWith mkRoot [0 :: Int ..] (map snd namedRoots)
, manifestExports = map mkExport namedRoots
, manifestMetadata = BundleMetadata
{ metadataPackage = Nothing
, metadataVersion = Nothing
, metadataDescription = Nothing
, metadataLicense = Nothing
, metadataCreatedBy = Just "arborix"
}
}
where
mkRoot 0 h = BundleRoot h "default"
mkRoot _ h = BundleRoot h "root"
mkExport (name, h) = BundleExport
{ exportName = name
, exportRoot = h
, exportKind = "term"
, exportAbi = "arborix.abi.tree.v1"
}
-- ---------------------------------------------------------------------------
-- Node section encoding / decoding
-- ---------------------------------------------------------------------------
encodeNodeSection :: Map MerkleHash ByteString -> ByteString
encodeNodeSection nodes =
encode64 (fromIntegral $ Map.size nodes)
<> mconcat (map nodeEntryToBinary $ Map.toAscList nodes)
-- | Encode a single (hash, canonical-payload) node entry.
nodeEntryToBinary :: (MerkleHash, ByteString) -> ByteString
nodeEntryToBinary (h, payload) =
merkleHashToRaw h
<> encode32 (fromIntegral $ BS.length payload)
<> payload
decodeNodeSection :: ByteString -> Either String (Map MerkleHash ByteString)
decodeNodeSection bs = do
(nodeCount, rest) <- decode64be "node_count" bs
decodeNodeEntries nodeCount rest
-- | Decode a sequence of node entries.
decodeNodeEntries :: Word64 -> ByteString -> Either String (Map MerkleHash ByteString)
decodeNodeEntries count bs = go count bs Map.empty
where
go 0 rest acc
| BS.null rest = Right acc
| otherwise = Left "trailing bytes after node section"
go n bytes acc
| BS.length bytes < 36 =
Left "not enough bytes for node entry header (hash + length)"
| otherwise = do
let (hashBytes, rest) = BS.splitAt 32 bytes
(plen, rest') <- decode32be "payload_len" rest
let payloadLen = fromIntegral plen
if BS.length rest' < payloadLen
then Left "payload extends beyond node section end"
else do
let (payload, after) = BS.splitAt payloadLen rest'
h = rawToMerkleHash hashBytes
when (Map.member h acc) $
Left $ "duplicate node entry: " ++ unpack h
go (n - 1) after (Map.insert h payload acc)
-- ---------------------------------------------------------------------------
-- Bundle verification
-- ---------------------------------------------------------------------------
verifyBundle :: Bundle -> Either String ()
verifyBundle bundle
| bundleVersion bundle < 1 = Left $ "unsupported bundle version: " ++ show (bundleVersion bundle)
| Map.null (bundleNodes bundle) = Left "bundle has no nodes"
verifyBundle bundle = do
verifyManifest (bundleManifest bundle)
let nodeMap = bundleNodes bundle
rootSet = Set.fromList (bundleRoots bundle)
manifestRootSet = Set.fromList (map rootHash $ manifestRoots $ bundleManifest bundle)
exportRoots = map exportRoot $ manifestExports $ bundleManifest bundle
unless (rootSet == manifestRootSet) $
Left "bundle root list does not match manifest roots"
traverse_ (requirePresent "root hash missing from bundle") (bundleRoots bundle)
traverse_ (requirePresent "export root hash missing from bundle") exportRoots
decoded <- traverse verifyNodePayload (Map.toList nodeMap)
traverse_ (verifyChildrenPresent nodeMap) decoded
verifyCompleteClosure nodeMap (bundleRoots bundle)
where
requirePresent label h =
unless (Map.member h (bundleNodes bundle)) $
Left $ label ++ ": " ++ unpack h
verifyManifest :: BundleManifest -> Either String ()
verifyManifest manifest = do
when (manifestSchema manifest /= "arborix.bundle.manifest.v1") $
Left $ "unsupported manifest schema: " ++ unpack (manifestSchema manifest)
when (manifestBundleType manifest /= "tree-calculus-executable-object") $
Left $ "unsupported bundle type: " ++ unpack (manifestBundleType manifest)
let treeSpec = manifestTree manifest
hashSpec = treeNodeHash treeSpec
runtimeSpec = manifestRuntime manifest
when (treeCalculus treeSpec /= "tree-calculus.v1") $
Left $ "unsupported calculus: " ++ unpack (treeCalculus treeSpec)
when (nodeHashAlgorithm hashSpec /= "sha256") $
Left $ "unsupported node hash algorithm: " ++ unpack (nodeHashAlgorithm hashSpec)
when (nodeHashDomain hashSpec /= "arborix.merkle.node.v1") $
Left $ "unsupported node hash domain: " ++ unpack (nodeHashDomain hashSpec)
when (treeNodePayload treeSpec /= "arborix.merkle.payload.v1") $
Left $ "unsupported node payload: " ++ unpack (treeNodePayload treeSpec)
when (runtimeSemantics runtimeSpec /= "tree-calculus.v1") $
Left $ "unsupported runtime semantics: " ++ unpack (runtimeSemantics runtimeSpec)
when (runtimeAbi runtimeSpec /= "arborix.abi.tree.v1") $
Left $ "unsupported runtime ABI: " ++ unpack (runtimeAbi runtimeSpec)
when (not (null (runtimeCapabilities runtimeSpec))) $
Left "unsupported runtime capabilities"
when (manifestClosure manifest /= ClosureComplete) $
Left "bundle v1 requires closure = complete"
when (null $ manifestRoots manifest) $
Left "manifest has no roots"
when (null $ manifestExports manifest) $
Left "manifest has no exports"
traverse_ verifyExport (manifestExports manifest)
where
verifyExport exported = do
when (T.null $ exportName exported) $
Left "manifest export has empty name"
when (T.null $ exportRoot exported) $
Left "manifest export has empty root"
verifyNodePayload :: (MerkleHash, ByteString) -> Either String (MerkleHash, Node)
verifyNodePayload (h, payload) = do
node <- safeDeserializeNode payload
let actual = nodeHash node
unless (actual == h) $
Left $ "node hash mismatch for " ++ unpack h ++ "; payload hashes to " ++ unpack actual
Right (h, node)
verifyChildrenPresent :: Map MerkleHash ByteString -> (MerkleHash, Node) -> Either String ()
verifyChildrenPresent nodeMap (h, node) =
case node of
NLeaf -> Right ()
NStem child -> requireChild h child
NFork left right -> requireChild h left >> requireChild h right
where
requireChild parent child =
unless (Map.member child nodeMap) $
Left $ "missing child node referenced by " ++ unpack parent ++ ": " ++ unpack child
verifyCompleteClosure :: Map MerkleHash ByteString -> [MerkleHash] -> Either String ()
verifyCompleteClosure nodeMap roots = do
_ <- foldM visit Set.empty roots
Right ()
where
visit seen h
| Set.member h seen = Right seen
| otherwise = do
payload <- case Map.lookup h nodeMap of
Nothing -> Left $ "closure missing node: " ++ unpack h
Just p -> Right p
node <- safeDeserializeNode payload
let seen' = Set.insert h seen
case node of
NLeaf -> Right seen'
NStem child -> visit seen' child
NFork left right -> visit seen' left >>= \seenL -> visit seenL right
safeDeserializeNode :: ByteString -> Either String Node
safeDeserializeNode payload =
case BS.uncons payload of
Just (0x00, rest)
| BS.null rest -> Right NLeaf
| otherwise -> Left "invalid leaf payload length"
Just (0x01, rest)
| BS.length rest == 32 -> Right $ NStem (rawToMerkleHash rest)
| otherwise -> Left "invalid stem payload length"
Just (0x02, rest)
| BS.length rest == 64 ->
let (left, right) = BS.splitAt 32 rest
in Right $ NFork (rawToMerkleHash left) (rawToMerkleHash right)
| otherwise -> Left "invalid fork payload length"
_ -> Left "invalid merkle node payload"
-- ---------------------------------------------------------------------------
-- Reachability traversal
-- ---------------------------------------------------------------------------
collectReachableNodes :: Connection -> MerkleHash -> IO [(MerkleHash, ByteString)]
collectReachableNodes conn root = do
let go seen current = do
case Map.lookup current seen of
Just _ -> return seen
Nothing -> do
maybeNode <- getNodeMerkle conn current
case maybeNode of
Nothing -> error $ "exportBundle: missing Merkle node: " ++ unpack current
Just node -> do
let payload = serializeNode node
seen' = Map.insert current payload seen
case node of
NLeaf -> return seen'
NStem childHash -> go seen' childHash
NFork lHash rHash -> go seen' lHash >>= \seenL -> go seenL rHash
seen <- go Map.empty root
return $ Map.toAscList seen
-- ---------------------------------------------------------------------------
-- High-level export / import
-- ---------------------------------------------------------------------------
exportBundle :: Connection -> [MerkleHash] -> IO ByteString
exportBundle conn hashes = exportNamedBundle conn (zip (defaultExportNames $ length hashes) hashes)
exportNamedBundle :: Connection -> [(Text, MerkleHash)] -> IO ByteString
exportNamedBundle conn namedHashes = do
let hashes = map snd namedHashes
entries <- concat <$> mapM (collectReachableNodes conn) hashes
let nodeMap = Map.fromList entries
manifest = defaultManifest namedHashes
manifestBytes = encodeManifest manifest
bundle = Bundle
{ bundleVersion = bundleMajorVersion * 1000 + bundleMinorVersion
, bundleRoots = hashes
, bundleNodes = nodeMap
, bundleManifest = manifest
, bundleManifestBytes = manifestBytes
}
return $ encodeBundle bundle
importBundle :: Connection -> ByteString -> IO [MerkleHash]
importBundle conn bs = case decodeBundle bs of
Left err -> error $ "Wire.importBundle: " ++ err
Right bundle -> case verifyBundle bundle of
Left err -> error $ "Wire.importBundle verify: " ++ err
Right () -> do
traverse_ (\payload -> do
node <- deserializeForImport payload
putMerkleNode conn node
)
(Map.elems $ bundleNodes bundle)
registerBundleExports conn bundle
return $ bundleRoots bundle
registerBundleExports :: Connection -> Bundle -> IO ()
registerBundleExports conn bundle =
traverse_ registerExport (manifestExports $ bundleManifest bundle)
where
registerExport exported = do
maybeTree <- loadTree conn (exportRoot exported)
case maybeTree of
Nothing -> error $ "Wire.importBundle: export root missing after node import: " ++ unpack (exportRoot exported)
Just tree -> do
_ <- storeTerm conn [unpack $ exportName exported] tree
return ()
-- ---------------------------------------------------------------------------
-- Primitive binary helpers
-- ---------------------------------------------------------------------------
encode16 :: Word16 -> ByteString
encode16 w = BS.pack
[ fromIntegral (shiftR w 8)
, fromIntegral w
]
encode32 :: Word32 -> ByteString
encode32 w = BS.pack
[ fromIntegral (shiftR w 24)
, fromIntegral (shiftR w 16)
, fromIntegral (shiftR w 8)
, fromIntegral w
]
encode64 :: Word64 -> ByteString
encode64 w = BS.pack
[ fromIntegral (shiftR w 56)
, fromIntegral (shiftR w 48)
, fromIntegral (shiftR w 40)
, fromIntegral (shiftR w 32)
, fromIntegral (shiftR w 24)
, fromIntegral (shiftR w 16)
, fromIntegral (shiftR w 8)
, fromIntegral w
]
decode16be :: String -> ByteString -> Either String (Word16, ByteString)
decode16be label bs
| BS.length bs < 2 = Left (label ++ ": not enough bytes for u16")
| otherwise =
let b0 = fromIntegral (BS.index bs 0) :: Word16
b1 = fromIntegral (BS.index bs 1) :: Word16
in Right ((b0 `shiftL` 8) .|. b1, BS.drop 2 bs)
-- | Decode a big-endian u32 from the head of a ByteString.
decode32be :: String -> ByteString -> Either String (Word32, ByteString)
decode32be label bs
| BS.length bs < 4 = Left (label ++ ": not enough bytes for u32")
| otherwise =
let b0 = fromIntegral (BS.index bs 0) :: Word32
b1 = fromIntegral (BS.index bs 1) :: Word32
b2 = fromIntegral (BS.index bs 2) :: Word32
b3 = fromIntegral (BS.index bs 3) :: Word32
val = (b0 `shiftL` 24) .|. (b1 `shiftL` 16)
.|. (b2 `shiftL` 8) .|. b3
in Right (val, BS.drop 4 bs)
decode64be :: String -> ByteString -> Either String (Word64, ByteString)
decode64be label bs
| BS.length bs < 8 = Left (label ++ ": not enough bytes for u64")
| otherwise =
let byte i = fromIntegral (BS.index bs i) :: Word64
val = (byte 0 `shiftL` 56) .|. (byte 1 `shiftL` 48)
.|. (byte 2 `shiftL` 40) .|. (byte 3 `shiftL` 32)
.|. (byte 4 `shiftL` 24) .|. (byte 5 `shiftL` 16)
.|. (byte 6 `shiftL` 8) .|. byte 7
in Right (val, BS.drop 8 bs)
-- ---------------------------------------------------------------------------
-- Hash conversion
-- ---------------------------------------------------------------------------
-- | Convert a hex MerkleHash to its raw 32-byte representation.
merkleHashToRaw :: MerkleHash -> ByteString
merkleHashToRaw h =
case Base16.decode (encodeUtf8 h) of
Left _ -> error $ "Wire.merkleHashToRaw: invalid hex: " ++ show h
Right bs
| BS.length bs == 32 -> bs
| otherwise -> error $ "Wire.merkleHashToRaw: expected 32 bytes: " ++ show h
-- | Convert raw 32 bytes back to a hex MerkleHash.
rawToMerkleHash :: ByteString -> MerkleHash
rawToMerkleHash bs = decodeUtf8 (Base16.encode bs)
sha256 :: ByteString -> ByteString
sha256 bytes = convert ((hash bytes) :: Digest SHA256)
defaultExportNames :: Int -> [Text]
defaultExportNames n =
case n of
0 -> []
1 -> ["root"]
_ -> ["root" <> T.pack (show i) | i <- [0 :: Int .. n - 1]]
deserializeForImport :: ByteString -> IO Node
deserializeForImport payload = do
result <- try (evaluate $ deserializeNode payload) :: IO (Either SomeException Node)
case result of
Left err -> error $ "Wire.importBundle: invalid merkle node payload: " ++ show err
Right node -> return node