{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE OverloadedStrings #-} module Wire ( Bundle (..) , BundleManifest (..) , TreeSpec (..) , NodeHashSpec (..) , RuntimeSpec (..) , BundleRoot (..) , BundleExport (..) , BundleMetadata , ClosureMode (..) , encodeBundle , decodeBundle , verifyBundle , collectReachableNodes , exportBundle , exportNamedBundle , importBundle , defaultExportNames ) where import ContentStore (getNodeMerkle, loadTree, putMerkleNode, storeTerm) import Research import Codec.CBOR.Decoding ( Decoder , decodeString , decodeBytes , decodeListLen , decodeMapLen ) import Control.Monad (replicateM, forM) import Codec.CBOR.Encoding ( Encoding , encodeMapLen , encodeListLen , encodeString , encodeBytes ) import Codec.CBOR.Write (toLazyByteString) import Data.Monoid (mconcat) import Codec.CBOR.Read (deserialiseFromBytes, DeserialiseFailure(..)) import Control.Exception (SomeException, evaluate, try) import Control.Monad (foldM, unless, when) import Crypto.Hash (Digest, SHA256, hash) import Data.Bits ((.&.), (.|.), shiftL, shiftR) import Data.ByteArray (convert) import Data.ByteString (ByteString) import Data.Foldable (traverse_) import Data.Map (Map) import Data.Text (Text, unpack) import Data.Text.Encoding (decodeUtf8, encodeUtf8) import Data.Word (Word16, Word32, Word64) import Database.SQLite.Simple (Connection) import GHC.Generics (Generic) import qualified Data.ByteString as BS import qualified Data.ByteString.Base16 as Base16 import qualified Data.ByteString.Lazy as BL import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.Text as T -- | Portable bundle major/minor version supported by this module. bundleMajorVersion :: Word16 bundleMajorVersion = 1 bundleMinorVersion :: Word16 bundleMinorVersion = 0 -- | Header magic for the portable executable-object container. bundleMagic :: ByteString bundleMagic = BS.pack [0x41, 0x52, 0x42, 0x4f, 0x52, 0x49, 0x58, 0x00] -- "ARBORIX\0" headerLength :: Int headerLength = 32 sectionEntryLength :: Int sectionEntryLength = 60 sectionManifest, sectionNodes :: Word32 sectionManifest = 1 sectionNodes = 2 flagCritical :: Word16 flagCritical = 0x0001 compressionNone, digestSha256 :: Word16 compressionNone = 0 digestSha256 = 1 -- --------------------------------------------------------------------------- -- CBOR encoding helpers -- --------------------------------------------------------------------------- -- | Canonical CBOR map length encoder. cmkLen :: Int -> Encoding cmkLen n = encodeMapLen (fromIntegral n) -- | Decode a CBOR array of n elements. decodeListN :: Decoder s a -> Int -> Decoder s [a] decodeListN dec n = replicateM n dec -- | Decode a CBOR map (sequence of key-value pairs). decodeMapN :: Decoder s a -> Decoder s b -> Int -> Decoder s [(a, b)] decodeMapN keyDec valDec n = forM [1..n] $ \_ -> keyDec >>= \k -> valDec >>= \v -> pure (k, v) decodeKey :: Text -> Decoder s () decodeKey expected = do actual <- decodeString unless (actual == expected) $ fail $ "expected key " ++ show expected ++ ", got " ++ show actual -- | Canonical CBOR array length encoder. cakLen :: Int -> Encoding cakLen n = encodeListLen (fromIntegral n) -- | Encode a canonical CBOR map with key-value pairs as flat sequence. cmkPairs :: [(Text, Encoding)] -> Encoding cmkPairs [] = cmkLen 0 cmkPairs kvs = cmkLen (length kvs) <> mconcat [encodeString k <> v | (k, v) <- kvs] -- | Encode a canonical CBOR array. cakSeq :: [Encoding] -> Encoding cakSeq [] = cakLen 0 cakSeq xs = cakLen (length xs) <> mconcat xs -- | Encode a canonical CBOR text string. encText :: Text -> Encoding encText = encodeString -- | Encode a canonical CBOR byte string. encBytes :: ByteString -> Encoding encBytes = encodeBytes -- --------------------------------------------------------------------------- -- Data types with CBOR instances -- --------------------------------------------------------------------------- -- | Closure declaration. data ClosureMode = ClosureComplete | ClosurePartial deriving (Show, Eq, Ord, Generic) toCBORClosure :: ClosureMode -> Encoding toCBORClosure = encText . \case ClosureComplete -> "complete" ClosurePartial -> "partial" closureFromCBOR :: Decoder s ClosureMode closureFromCBOR = decodeString >>= \case "complete" -> pure ClosureComplete "partial" -> pure ClosurePartial other -> fail $ "ClosureMode: " ++ show other -- | Hash specification (algorithm + domain strings). data NodeHashSpec = NodeHashSpec { nodeHashAlgorithm :: Text , nodeHashDomain :: Text } deriving (Show, Eq, Ord, Generic) toCBORNodeHashSpec :: NodeHashSpec -> Encoding toCBORNodeHashSpec (NodeHashSpec alg dom) = cmkPairs [ ("algorithm", encText alg) , ("domain", encText dom) ] nodeHashSpecFromCBOR :: Decoder s NodeHashSpec nodeHashSpecFromCBOR = do n <- decodeMapLen unless (n == 2) $ fail "NodeHashSpec: must have exactly 2 entries" decodeKey "algorithm" alg <- decodeString decodeKey "domain" dom <- decodeString pure (NodeHashSpec alg dom) -- | Tree specification. data TreeSpec = TreeSpec { treeCalculus :: Text , treeNodeHash :: NodeHashSpec , treeNodePayload :: Text } deriving (Show, Eq, Ord, Generic) toCBORTreeSpec :: TreeSpec -> Encoding toCBORTreeSpec (TreeSpec calc hspec payload) = cmkPairs [ ("calculus", encText calc) , ("nodeHash", toCBORNodeHashSpec hspec) , ("nodePayload", encText payload) ] treeSpecFromCBOR :: Decoder s TreeSpec treeSpecFromCBOR = do n <- decodeMapLen unless (n == 3) $ fail "TreeSpec: must have exactly 3 entries" decodeKey "calculus" calc <- decodeString decodeKey "nodeHash" hspec <- nodeHashSpecFromCBOR decodeKey "nodePayload" payload <- decodeString pure (TreeSpec calc hspec payload) -- | Runtime specification. data RuntimeSpec = RuntimeSpec { runtimeSemantics :: Text , runtimeEvaluation :: Text , runtimeAbi :: Text , runtimeCapabilities :: [Text] } deriving (Show, Eq, Ord, Generic) toCBORRuntimeSpec :: RuntimeSpec -> Encoding toCBORRuntimeSpec (RuntimeSpec sem eval abi caps) = cmkPairs [ ("semantics", encText sem) , ("evaluation", encText eval) , ("abi", encText abi) , ("capabilities", cakSeq (map encText caps)) ] runtimeSpecFromCBOR :: Decoder s RuntimeSpec runtimeSpecFromCBOR = do n <- decodeMapLen unless (n == 4) $ fail "RuntimeSpec: must have exactly 4 entries" decodeKey "semantics" sem <- decodeString decodeKey "evaluation" eval <- decodeString decodeKey "abi" abi <- decodeString decodeKey "capabilities" clen <- decodeListLen caps <- decodeListN decodeString clen pure (RuntimeSpec sem eval abi caps) -- | A root hash reference. data BundleRoot = BundleRoot { rootHash :: MerkleHash , rootRole :: Text } deriving (Show, Eq, Ord, Generic) toCBORBundleRoot :: BundleRoot -> Encoding toCBORBundleRoot (BundleRoot h role) = cmkPairs [ ("hash", encBytes (merkleHashToRaw h)) , ("role", encText role) ] bundleRootFromCBOR :: Decoder s BundleRoot bundleRootFromCBOR = do n <- decodeMapLen unless (n == 2) $ fail "BundleRoot: must have exactly 2 entries" decodeKey "hash" hRaw <- decodeBytes decodeKey "role" role <- decodeString pure (BundleRoot (rawToMerkleHash hRaw) role) -- | An export entry. data BundleExport = BundleExport { exportName :: Text , exportRoot :: MerkleHash , exportKind :: Text , exportAbi :: Text } deriving (Show, Eq, Ord, Generic) toCBORBundleExport :: BundleExport -> Encoding toCBORBundleExport (BundleExport name h kind abi) = cmkPairs [ ("name", encText name) , ("root", encBytes (merkleHashToRaw h)) , ("kind", encText kind) , ("abi", encText abi) ] bundleExportFromCBOR :: Decoder s BundleExport bundleExportFromCBOR = do n <- decodeMapLen unless (n == 4) $ fail "BundleExport: must have exactly 4 entries" decodeKey "name" name <- decodeString decodeKey "root" hRaw <- decodeBytes decodeKey "kind" kind <- decodeString decodeKey "abi" abi <- decodeString pure (BundleExport name (rawToMerkleHash hRaw) kind abi) -- | Optional package metadata. data BundleMetadata = BundleMetadata { metadataPackage :: Maybe Text , metadataVersion :: Maybe Text , metadataDescription :: Maybe Text , metadataLicense :: Maybe Text , metadataCreatedBy :: Maybe Text } deriving (Show, Eq, Ord, Generic) metadataFromCBOR :: Decoder s BundleMetadata metadataFromCBOR = do mlen <- decodeMapLen entries <- decodeMapN decodeString decodeString mlen let lookupText k = go k entries go _ [] = Nothing go k ((k', v):rest) | k == k' = Just v | otherwise = go k rest pure BundleMetadata { metadataPackage = lookupText "package" , metadataVersion = lookupText "version" , metadataDescription = lookupText "description" , metadataLicense = lookupText "license" , metadataCreatedBy = lookupText "createdBy" } metadataToCBOR :: BundleMetadata -> Encoding metadataToCBOR (BundleMetadata pkg ver desc lic by) = let pairs = maybe [] (\v -> [("package", encText v)]) pkg ++ maybe [] (\v -> [("version", encText v)]) ver ++ maybe [] (\v -> [("description", encText v)]) desc ++ maybe [] (\v -> [("license", encText v)]) lic ++ maybe [] (\v -> [("createdBy", encText v)]) by in cmkPairs pairs -- | The manifest: top-level bundle metadata. data BundleManifest = BundleManifest { manifestSchema :: Text , manifestBundleType :: Text , manifestTree :: TreeSpec , manifestRuntime :: RuntimeSpec , manifestClosure :: ClosureMode , manifestRoots :: [BundleRoot] , manifestExports :: [BundleExport] , manifestMetadata :: BundleMetadata } deriving (Show, Eq, Generic) manifestToCBOR :: BundleManifest -> Encoding manifestToCBOR m = cmkPairs [ ("schema", encText (manifestSchema m)) , ("bundleType", encText (manifestBundleType m)) , ("tree", toCBORTreeSpec (manifestTree m)) , ("runtime", toCBORRuntimeSpec (manifestRuntime m)) , ("closure", toCBORClosure (manifestClosure m)) , ("roots", cakSeq (map toCBORBundleRoot (manifestRoots m))) , ("exports", cakSeq (map toCBORBundleExport (manifestExports m))) , ("metadata", metadataToCBOR (manifestMetadata m)) ] manifestFromCBOR :: Decoder s BundleManifest manifestFromCBOR = do n <- decodeMapLen unless (n == 8) $ fail "BundleManifest: must have exactly 8 entries" decodeKey "schema" schema <- decodeString decodeKey "bundleType" bundleType <- decodeString decodeKey "tree" tree <- treeSpecFromCBOR decodeKey "runtime" runtime <- runtimeSpecFromCBOR decodeKey "closure" closure <- closureFromCBOR decodeKey "roots" rlen <- decodeListLen roots <- decodeListN bundleRootFromCBOR rlen decodeKey "exports" elen <- decodeListLen exports <- decodeListN bundleExportFromCBOR elen decodeKey "metadata" metadata <- metadataFromCBOR pure (BundleManifest schema bundleType tree runtime closure roots exports metadata) -- | Portable executable-object bundle. -- -- Merkle node payloads remain the language-neutral executable core: -- Leaf = 0x00; Stem = 0x01 || child_hash; Fork = 0x02 || left_hash || right_hash. -- Names, exports, runtime metadata, and package metadata live in the manifest layer. data Bundle = Bundle { bundleVersion :: Word16 , bundleRoots :: [MerkleHash] , bundleNodes :: Map MerkleHash ByteString , bundleManifest :: BundleManifest , bundleManifestBytes :: ByteString } deriving (Show, Eq) -- --------------------------------------------------------------------------- -- CBOR manifest serialization -- --------------------------------------------------------------------------- -- | Encode the manifest as canonical CBOR. encodeManifest :: BundleManifest -> ByteString encodeManifest m = BL.toStrict (toLazyByteString (manifestToCBOR m)) -- | Decode a manifest from CBOR bytes. decodeManifest :: ByteString -> Either String BundleManifest decodeManifest bs = case deserialiseFromBytes manifestFromCBOR (BL.fromStrict bs) of Right (rest, m) | BS.null (BL.toStrict rest) -> Right m | otherwise -> Left "trailing bytes after manifest CBOR" Left (DeserialiseFailure _ msg) -> Left msg -- --------------------------------------------------------------------------- -- Bundle encoding -- --------------------------------------------------------------------------- -- | Encode a Bundle to portable Bundle v1 bytes. encodeBundle :: Bundle -> ByteString encodeBundle bundle = let nodeSection = encodeNodeSection (bundleNodes bundle) manifestBytes = if BS.null (bundleManifestBytes bundle) then encodeManifest (bundleManifest bundle) else bundleManifestBytes bundle sectionCount = 2 dirOffset = fromIntegral headerLength sectionDirLength = sectionCount * sectionEntryLength manifestOffset = fromIntegral (headerLength + sectionDirLength) nodesOffset = manifestOffset + fromIntegral (BS.length manifestBytes) manifestEntry = encodeSectionEntry sectionManifest 1 flagCritical compressionNone manifestOffset (fromIntegral $ BS.length manifestBytes) manifestBytes nodesEntry = encodeSectionEntry sectionNodes 1 flagCritical compressionNone nodesOffset (fromIntegral $ BS.length nodeSection) nodeSection header = encodeHeader bundleMajorVersion bundleMinorVersion (fromIntegral sectionCount) 0 dirOffset in header <> manifestEntry <> nodesEntry <> manifestBytes <> nodeSection -- | Decode portable Bundle v1 bytes. decodeBundle :: ByteString -> Either String Bundle decodeBundle bs | BS.take (BS.length bundleMagic) bs == bundleMagic = decodePortableBundle bs | otherwise = Left "invalid magic" -- --------------------------------------------------------------------------- -- Portable container encoding / decoding -- --------------------------------------------------------------------------- data SectionEntry = SectionEntry { seType :: Word32 , seVersion :: Word16 , seFlags :: Word16 , seCompression :: Word16 , seDigestAlgorithm :: Word16 , seOffset :: Word64 , seLength :: Word64 , seDigest :: ByteString } deriving (Show, Eq) encodeHeader :: Word16 -> Word16 -> Word32 -> Word64 -> Word64 -> ByteString encodeHeader major minor sectionCount flags dirOffset = bundleMagic <> encode16 major <> encode16 minor <> encode32 sectionCount <> encode64 flags <> encode64 dirOffset encodeSectionEntry :: Word32 -> Word16 -> Word16 -> Word16 -> Word64 -> Word64 -> ByteString -> ByteString encodeSectionEntry sectionType sectionVersion sectionFlags compression offset lengthBytes sectionBytes = encode32 sectionType <> encode16 sectionVersion <> encode16 sectionFlags <> encode16 compression <> encode16 digestSha256 <> encode64 offset <> encode64 lengthBytes <> sha256 sectionBytes decodePortableBundle :: ByteString -> Either String Bundle decodePortableBundle bs = do (major, minor, sectionCount, _flags, dirOffset) <- decodePortableHeader bs when (major /= bundleMajorVersion) $ Left $ "unsupported bundle major version: " ++ show major let dirStart = fromIntegral dirOffset dirBytes = fromIntegral sectionCount * sectionEntryLength when (BS.length bs < dirStart + dirBytes) $ Left "bundle truncated in section directory" let dirRaw = BS.take dirBytes $ BS.drop dirStart bs entries <- decodeSectionEntries sectionCount dirRaw traverse_ rejectUnknownCritical entries manifestEntry <- requireSection sectionManifest entries nodesEntry <- requireSection sectionNodes entries manifestBytes <- readAndVerifySection bs manifestEntry nodesBytes <- readAndVerifySection bs nodesEntry manifest <- decodeManifest manifestBytes nodes <- decodeNodeSection nodesBytes let roots = map rootHash (manifestRoots manifest) return Bundle { bundleVersion = major * 1000 + minor , bundleRoots = roots , bundleNodes = nodes , bundleManifest = manifest , bundleManifestBytes = manifestBytes } rejectUnknownCritical :: SectionEntry -> Either String () rejectUnknownCritical entry = let known = seType entry `elem` [sectionManifest, sectionNodes] critical = seFlags entry .&. flagCritical /= 0 in when (critical && not known) $ Left $ "unknown critical section type: " ++ show (seType entry) requireSection :: Word32 -> [SectionEntry] -> Either String SectionEntry requireSection sectionType entries = case filter ((== sectionType) . seType) entries of [entry] -> Right entry [] -> Left $ "missing required section type: " ++ show sectionType _ -> Left $ "duplicate section type: " ++ show sectionType readAndVerifySection :: ByteString -> SectionEntry -> Either String ByteString readAndVerifySection bs entry = do when (seCompression entry /= compressionNone) $ Left $ "unsupported compression codec in section " ++ show (seType entry) when (seDigestAlgorithm entry /= digestSha256) $ Left $ "unsupported digest algorithm in section " ++ show (seType entry) let offset = fromIntegral (seOffset entry) len = fromIntegral (seLength entry) when (offset < 0 || len < 0 || BS.length bs < offset + len) $ Left $ "section extends beyond bundle end: " ++ show (seType entry) let sectionBytes = BS.take len $ BS.drop offset bs when (sha256 sectionBytes /= seDigest entry) $ Left $ "section digest mismatch: " ++ show (seType entry) Right sectionBytes decodePortableHeader :: ByteString -> Either String (Word16, Word16, Word32, Word64, Word64) decodePortableHeader bs | BS.length bs < headerLength = Left "bundle too short for header" | BS.take 8 bs /= bundleMagic = Left "invalid portable bundle magic" | otherwise = do (major, r1) <- decode16be "major_version" (BS.drop 8 bs) (minor, r2) <- decode16be "minor_version" r1 (sectionCount, r3) <- decode32be "section_count" r2 (flags, r4) <- decode64be "flags" r3 (dirOffset, _) <- decode64be "directory_offset" r4 Right (major, minor, sectionCount, flags, dirOffset) decodeSectionEntries :: Word32 -> ByteString -> Either String [SectionEntry] decodeSectionEntries count bytes = reverse <$> go count bytes [] where go 0 _ acc = Right acc go n bs acc = do when (BS.length bs < sectionEntryLength) $ Left "section directory truncated" (sectionType, r1) <- decode32be "section_type" bs (sectionVersion, r2) <- decode16be "section_version" r1 (sectionFlags, r3) <- decode16be "section_flags" r2 (compression, r4) <- decode16be "compression_codec" r3 (digAlg, r5) <- decode16be "digest_algorithm" r4 (offset, r6) <- decode64be "section_offset" r5 (len, r7) <- decode64be "section_length" r6 let (dig, rest) = BS.splitAt 32 r7 when (BS.length dig /= 32) $ Left "section digest truncated" let entry = SectionEntry sectionType sectionVersion sectionFlags compression digAlg offset len dig go (n - 1) rest (entry : acc) -- --------------------------------------------------------------------------- -- Manifest construction -- --------------------------------------------------------------------------- defaultManifest :: [(Text, MerkleHash)] -> BundleManifest defaultManifest namedRoots = BundleManifest { manifestSchema = "arborix.bundle.manifest.v1" , manifestBundleType = "tree-calculus-executable-object" , manifestTree = TreeSpec { treeCalculus = "tree-calculus.v1" , treeNodeHash = NodeHashSpec { nodeHashAlgorithm = "sha256" , nodeHashDomain = "arborix.merkle.node.v1" } , treeNodePayload = "arborix.merkle.payload.v1" } , manifestRuntime = RuntimeSpec { runtimeSemantics = "tree-calculus.v1" , runtimeEvaluation = "normal-order" , runtimeAbi = "arborix.abi.tree.v1" , runtimeCapabilities = [] } , manifestClosure = ClosureComplete , manifestRoots = zipWith mkRoot [0 :: Int ..] (map snd namedRoots) , manifestExports = map mkExport namedRoots , manifestMetadata = BundleMetadata { metadataPackage = Nothing , metadataVersion = Nothing , metadataDescription = Nothing , metadataLicense = Nothing , metadataCreatedBy = Just "arborix" } } where mkRoot 0 h = BundleRoot h "default" mkRoot _ h = BundleRoot h "root" mkExport (name, h) = BundleExport { exportName = name , exportRoot = h , exportKind = "term" , exportAbi = "arborix.abi.tree.v1" } -- --------------------------------------------------------------------------- -- Node section encoding / decoding -- --------------------------------------------------------------------------- encodeNodeSection :: Map MerkleHash ByteString -> ByteString encodeNodeSection nodes = encode64 (fromIntegral $ Map.size nodes) <> mconcat (map nodeEntryToBinary $ Map.toAscList nodes) -- | Encode a single (hash, canonical-payload) node entry. nodeEntryToBinary :: (MerkleHash, ByteString) -> ByteString nodeEntryToBinary (h, payload) = merkleHashToRaw h <> encode32 (fromIntegral $ BS.length payload) <> payload decodeNodeSection :: ByteString -> Either String (Map MerkleHash ByteString) decodeNodeSection bs = do (nodeCount, rest) <- decode64be "node_count" bs decodeNodeEntries nodeCount rest -- | Decode a sequence of node entries. decodeNodeEntries :: Word64 -> ByteString -> Either String (Map MerkleHash ByteString) decodeNodeEntries count bs = go count bs Map.empty where go 0 rest acc | BS.null rest = Right acc | otherwise = Left "trailing bytes after node section" go n bytes acc | BS.length bytes < 36 = Left "not enough bytes for node entry header (hash + length)" | otherwise = do let (hashBytes, rest) = BS.splitAt 32 bytes (plen, rest') <- decode32be "payload_len" rest let payloadLen = fromIntegral plen if BS.length rest' < payloadLen then Left "payload extends beyond node section end" else do let (payload, after) = BS.splitAt payloadLen rest' h = rawToMerkleHash hashBytes when (Map.member h acc) $ Left $ "duplicate node entry: " ++ unpack h go (n - 1) after (Map.insert h payload acc) -- --------------------------------------------------------------------------- -- Bundle verification -- --------------------------------------------------------------------------- verifyBundle :: Bundle -> Either String () verifyBundle bundle | bundleVersion bundle < 1 = Left $ "unsupported bundle version: " ++ show (bundleVersion bundle) | Map.null (bundleNodes bundle) = Left "bundle has no nodes" verifyBundle bundle = do verifyManifest (bundleManifest bundle) let nodeMap = bundleNodes bundle rootSet = Set.fromList (bundleRoots bundle) manifestRootSet = Set.fromList (map rootHash $ manifestRoots $ bundleManifest bundle) exportRoots = map exportRoot $ manifestExports $ bundleManifest bundle unless (rootSet == manifestRootSet) $ Left "bundle root list does not match manifest roots" traverse_ (requirePresent "root hash missing from bundle") (bundleRoots bundle) traverse_ (requirePresent "export root hash missing from bundle") exportRoots decoded <- traverse verifyNodePayload (Map.toList nodeMap) traverse_ (verifyChildrenPresent nodeMap) decoded verifyCompleteClosure nodeMap (bundleRoots bundle) where requirePresent label h = unless (Map.member h (bundleNodes bundle)) $ Left $ label ++ ": " ++ unpack h verifyManifest :: BundleManifest -> Either String () verifyManifest manifest = do when (manifestSchema manifest /= "arborix.bundle.manifest.v1") $ Left $ "unsupported manifest schema: " ++ unpack (manifestSchema manifest) when (manifestBundleType manifest /= "tree-calculus-executable-object") $ Left $ "unsupported bundle type: " ++ unpack (manifestBundleType manifest) let treeSpec = manifestTree manifest hashSpec = treeNodeHash treeSpec runtimeSpec = manifestRuntime manifest when (treeCalculus treeSpec /= "tree-calculus.v1") $ Left $ "unsupported calculus: " ++ unpack (treeCalculus treeSpec) when (nodeHashAlgorithm hashSpec /= "sha256") $ Left $ "unsupported node hash algorithm: " ++ unpack (nodeHashAlgorithm hashSpec) when (nodeHashDomain hashSpec /= "arborix.merkle.node.v1") $ Left $ "unsupported node hash domain: " ++ unpack (nodeHashDomain hashSpec) when (treeNodePayload treeSpec /= "arborix.merkle.payload.v1") $ Left $ "unsupported node payload: " ++ unpack (treeNodePayload treeSpec) when (runtimeSemantics runtimeSpec /= "tree-calculus.v1") $ Left $ "unsupported runtime semantics: " ++ unpack (runtimeSemantics runtimeSpec) when (runtimeAbi runtimeSpec /= "arborix.abi.tree.v1") $ Left $ "unsupported runtime ABI: " ++ unpack (runtimeAbi runtimeSpec) when (not (null (runtimeCapabilities runtimeSpec))) $ Left "unsupported runtime capabilities" when (manifestClosure manifest /= ClosureComplete) $ Left "bundle v1 requires closure = complete" when (null $ manifestRoots manifest) $ Left "manifest has no roots" when (null $ manifestExports manifest) $ Left "manifest has no exports" traverse_ verifyExport (manifestExports manifest) where verifyExport exported = do when (T.null $ exportName exported) $ Left "manifest export has empty name" when (T.null $ exportRoot exported) $ Left "manifest export has empty root" verifyNodePayload :: (MerkleHash, ByteString) -> Either String (MerkleHash, Node) verifyNodePayload (h, payload) = do node <- safeDeserializeNode payload let actual = nodeHash node unless (actual == h) $ Left $ "node hash mismatch for " ++ unpack h ++ "; payload hashes to " ++ unpack actual Right (h, node) verifyChildrenPresent :: Map MerkleHash ByteString -> (MerkleHash, Node) -> Either String () verifyChildrenPresent nodeMap (h, node) = case node of NLeaf -> Right () NStem child -> requireChild h child NFork left right -> requireChild h left >> requireChild h right where requireChild parent child = unless (Map.member child nodeMap) $ Left $ "missing child node referenced by " ++ unpack parent ++ ": " ++ unpack child verifyCompleteClosure :: Map MerkleHash ByteString -> [MerkleHash] -> Either String () verifyCompleteClosure nodeMap roots = do _ <- foldM visit Set.empty roots Right () where visit seen h | Set.member h seen = Right seen | otherwise = do payload <- case Map.lookup h nodeMap of Nothing -> Left $ "closure missing node: " ++ unpack h Just p -> Right p node <- safeDeserializeNode payload let seen' = Set.insert h seen case node of NLeaf -> Right seen' NStem child -> visit seen' child NFork left right -> visit seen' left >>= \seenL -> visit seenL right safeDeserializeNode :: ByteString -> Either String Node safeDeserializeNode payload = case BS.uncons payload of Just (0x00, rest) | BS.null rest -> Right NLeaf | otherwise -> Left "invalid leaf payload length" Just (0x01, rest) | BS.length rest == 32 -> Right $ NStem (rawToMerkleHash rest) | otherwise -> Left "invalid stem payload length" Just (0x02, rest) | BS.length rest == 64 -> let (left, right) = BS.splitAt 32 rest in Right $ NFork (rawToMerkleHash left) (rawToMerkleHash right) | otherwise -> Left "invalid fork payload length" _ -> Left "invalid merkle node payload" -- --------------------------------------------------------------------------- -- Reachability traversal -- --------------------------------------------------------------------------- collectReachableNodes :: Connection -> MerkleHash -> IO [(MerkleHash, ByteString)] collectReachableNodes conn root = do let go seen current = do case Map.lookup current seen of Just _ -> return seen Nothing -> do maybeNode <- getNodeMerkle conn current case maybeNode of Nothing -> error $ "exportBundle: missing Merkle node: " ++ unpack current Just node -> do let payload = serializeNode node seen' = Map.insert current payload seen case node of NLeaf -> return seen' NStem childHash -> go seen' childHash NFork lHash rHash -> go seen' lHash >>= \seenL -> go seenL rHash seen <- go Map.empty root return $ Map.toAscList seen -- --------------------------------------------------------------------------- -- High-level export / import -- --------------------------------------------------------------------------- exportBundle :: Connection -> [MerkleHash] -> IO ByteString exportBundle conn hashes = exportNamedBundle conn (zip (defaultExportNames $ length hashes) hashes) exportNamedBundle :: Connection -> [(Text, MerkleHash)] -> IO ByteString exportNamedBundle conn namedHashes = do let hashes = map snd namedHashes entries <- concat <$> mapM (collectReachableNodes conn) hashes let nodeMap = Map.fromList entries manifest = defaultManifest namedHashes manifestBytes = encodeManifest manifest bundle = Bundle { bundleVersion = bundleMajorVersion * 1000 + bundleMinorVersion , bundleRoots = hashes , bundleNodes = nodeMap , bundleManifest = manifest , bundleManifestBytes = manifestBytes } return $ encodeBundle bundle importBundle :: Connection -> ByteString -> IO [MerkleHash] importBundle conn bs = case decodeBundle bs of Left err -> error $ "Wire.importBundle: " ++ err Right bundle -> case verifyBundle bundle of Left err -> error $ "Wire.importBundle verify: " ++ err Right () -> do traverse_ (\payload -> do node <- deserializeForImport payload putMerkleNode conn node ) (Map.elems $ bundleNodes bundle) registerBundleExports conn bundle return $ bundleRoots bundle registerBundleExports :: Connection -> Bundle -> IO () registerBundleExports conn bundle = traverse_ registerExport (manifestExports $ bundleManifest bundle) where registerExport exported = do maybeTree <- loadTree conn (exportRoot exported) case maybeTree of Nothing -> error $ "Wire.importBundle: export root missing after node import: " ++ unpack (exportRoot exported) Just tree -> do _ <- storeTerm conn [unpack $ exportName exported] tree return () -- --------------------------------------------------------------------------- -- Primitive binary helpers -- --------------------------------------------------------------------------- encode16 :: Word16 -> ByteString encode16 w = BS.pack [ fromIntegral (shiftR w 8) , fromIntegral w ] encode32 :: Word32 -> ByteString encode32 w = BS.pack [ fromIntegral (shiftR w 24) , fromIntegral (shiftR w 16) , fromIntegral (shiftR w 8) , fromIntegral w ] encode64 :: Word64 -> ByteString encode64 w = BS.pack [ fromIntegral (shiftR w 56) , fromIntegral (shiftR w 48) , fromIntegral (shiftR w 40) , fromIntegral (shiftR w 32) , fromIntegral (shiftR w 24) , fromIntegral (shiftR w 16) , fromIntegral (shiftR w 8) , fromIntegral w ] decode16be :: String -> ByteString -> Either String (Word16, ByteString) decode16be label bs | BS.length bs < 2 = Left (label ++ ": not enough bytes for u16") | otherwise = let b0 = fromIntegral (BS.index bs 0) :: Word16 b1 = fromIntegral (BS.index bs 1) :: Word16 in Right ((b0 `shiftL` 8) .|. b1, BS.drop 2 bs) -- | Decode a big-endian u32 from the head of a ByteString. decode32be :: String -> ByteString -> Either String (Word32, ByteString) decode32be label bs | BS.length bs < 4 = Left (label ++ ": not enough bytes for u32") | otherwise = let b0 = fromIntegral (BS.index bs 0) :: Word32 b1 = fromIntegral (BS.index bs 1) :: Word32 b2 = fromIntegral (BS.index bs 2) :: Word32 b3 = fromIntegral (BS.index bs 3) :: Word32 val = (b0 `shiftL` 24) .|. (b1 `shiftL` 16) .|. (b2 `shiftL` 8) .|. b3 in Right (val, BS.drop 4 bs) decode64be :: String -> ByteString -> Either String (Word64, ByteString) decode64be label bs | BS.length bs < 8 = Left (label ++ ": not enough bytes for u64") | otherwise = let byte i = fromIntegral (BS.index bs i) :: Word64 val = (byte 0 `shiftL` 56) .|. (byte 1 `shiftL` 48) .|. (byte 2 `shiftL` 40) .|. (byte 3 `shiftL` 32) .|. (byte 4 `shiftL` 24) .|. (byte 5 `shiftL` 16) .|. (byte 6 `shiftL` 8) .|. byte 7 in Right (val, BS.drop 8 bs) -- --------------------------------------------------------------------------- -- Hash conversion -- --------------------------------------------------------------------------- -- | Convert a hex MerkleHash to its raw 32-byte representation. merkleHashToRaw :: MerkleHash -> ByteString merkleHashToRaw h = case Base16.decode (encodeUtf8 h) of Left _ -> error $ "Wire.merkleHashToRaw: invalid hex: " ++ show h Right bs | BS.length bs == 32 -> bs | otherwise -> error $ "Wire.merkleHashToRaw: expected 32 bytes: " ++ show h -- | Convert raw 32 bytes back to a hex MerkleHash. rawToMerkleHash :: ByteString -> MerkleHash rawToMerkleHash bs = decodeUtf8 (Base16.encode bs) sha256 :: ByteString -> ByteString sha256 bytes = convert ((hash bytes) :: Digest SHA256) defaultExportNames :: Int -> [Text] defaultExportNames n = case n of 0 -> [] 1 -> ["root"] _ -> ["root" <> T.pack (show i) | i <- [0 :: Int .. n - 1]] deserializeForImport :: ByteString -> IO Node deserializeForImport payload = do result <- try (evaluate $ deserializeNode payload) :: IO (Either SomeException Node) case result of Left err -> error $ "Wire.importBundle: invalid merkle node payload: " ++ show err Right node -> return node