Switch manifest serialization to CBOR

Replace JSON-based bundle manifest with a CBOR-encoded format. The manifest
is now a canonical CBOR map with order-strict key decoding, raw 32-byte hash
payloads (instead of hex-encoded JSON), and compact binary representation.
This commit is contained in:
2026-05-07 21:41:50 -05:00
parent d9f25a2b5a
commit e3117e3ac8
23 changed files with 988 additions and 275 deletions

View File

@@ -85,12 +85,12 @@ serializeNode (NFork l r) = BS.pack [0x02] <> go (decode (encodeUtf8 l)) <> go (
go (Right bs) = bs
-- | Hash a node per the Merkle content-addressing spec.
-- hash = SHA256( "tricu.merkle.node.v1" <> 0x00 <> node_payload )
-- hash = SHA256( "arborix.merkle.node.v1" <> 0x00 <> node_payload )
nodeHash :: Node -> MerkleHash
nodeHash node = decodeUtf8 (encode (sha256WithPrefix (serializeNode node)))
where sha256WithPrefix payload =
convert . (hash :: BS.ByteString -> Digest SHA256) $ utf8Tag <> BS.pack [0x00] <> payload
utf8Tag = BS.pack $ map fromIntegral $ BS.unpack "tricu.merkle.node.v1"
utf8Tag = BS.pack $ map fromIntegral $ BS.unpack "arborix.merkle.node.v1"
-- | Deserialize a Node from canonical bytes.
deserializeNode :: BS.ByteString -> Node

View File

@@ -1,4 +1,5 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
module Wire
( Bundle (..)
@@ -8,7 +9,7 @@ module Wire
, RuntimeSpec (..)
, BundleRoot (..)
, BundleExport (..)
, BundleMetadata (..)
, BundleMetadata
, ClosureMode (..)
, encodeBundle
, decodeBundle
@@ -23,21 +24,26 @@ module Wire
import ContentStore (getNodeMerkle, loadTree, putMerkleNode, storeTerm)
import Research
import Codec.CBOR.Decoding ( Decoder
, decodeString
, decodeBytes
, decodeListLen
, decodeMapLen
)
import Control.Monad (replicateM, forM)
import Codec.CBOR.Encoding ( Encoding
, encodeMapLen
, encodeListLen
, encodeString
, encodeBytes
)
import Codec.CBOR.Write (toLazyByteString)
import Data.Monoid (mconcat)
import Codec.CBOR.Read (deserialiseFromBytes, DeserialiseFailure(..))
import Control.Exception (SomeException, evaluate, try)
import Control.Monad (foldM, unless, when)
import Crypto.Hash (Digest, SHA256, hash)
import Data.Aeson ( FromJSON (..)
, ToJSON (..)
, Value (String)
, eitherDecodeStrict'
, encode
, object
, withObject
, (.:)
, (.:?)
, (.!=)
, (.=)
)
import Data.Bits ((.&.), (.|.), shiftL, shiftR)
import Data.ByteArray (convert)
import Data.ByteString (ByteString)
@@ -84,54 +90,121 @@ compressionNone, digestSha256 :: Word16
compressionNone = 0
digestSha256 = 1
-- | Closure declaration. V1 only accepts complete bundles for import.
-- ---------------------------------------------------------------------------
-- CBOR encoding helpers
-- ---------------------------------------------------------------------------
-- | Canonical CBOR map length encoder.
cmkLen :: Int -> Encoding
cmkLen n = encodeMapLen (fromIntegral n)
-- | Decode a CBOR array of n elements.
decodeListN :: Decoder s a -> Int -> Decoder s [a]
decodeListN dec n = replicateM n dec
-- | Decode a CBOR map (sequence of key-value pairs).
decodeMapN :: Decoder s a -> Decoder s b -> Int -> Decoder s [(a, b)]
decodeMapN keyDec valDec n = forM [1..n] $ \_ ->
keyDec >>= \k -> valDec >>= \v -> pure (k, v)
decodeKey :: Text -> Decoder s ()
decodeKey expected = do
actual <- decodeString
unless (actual == expected) $
fail $ "expected key " ++ show expected ++ ", got " ++ show actual
-- | Canonical CBOR array length encoder.
cakLen :: Int -> Encoding
cakLen n = encodeListLen (fromIntegral n)
-- | Encode a canonical CBOR map with key-value pairs as flat sequence.
cmkPairs :: [(Text, Encoding)] -> Encoding
cmkPairs [] = cmkLen 0
cmkPairs kvs = cmkLen (length kvs) <> mconcat [encodeString k <> v | (k, v) <- kvs]
-- | Encode a canonical CBOR array.
cakSeq :: [Encoding] -> Encoding
cakSeq [] = cakLen 0
cakSeq xs = cakLen (length xs) <> mconcat xs
-- | Encode a canonical CBOR text string.
encText :: Text -> Encoding
encText = encodeString
-- | Encode a canonical CBOR byte string.
encBytes :: ByteString -> Encoding
encBytes = encodeBytes
-- ---------------------------------------------------------------------------
-- Data types with CBOR instances
-- ---------------------------------------------------------------------------
-- | Closure declaration.
data ClosureMode = ClosureComplete | ClosurePartial
deriving (Show, Eq, Ord, Generic)
instance ToJSON ClosureMode where
toJSON ClosureComplete = String "complete"
toJSON ClosurePartial = String "partial"
toCBORClosure :: ClosureMode -> Encoding
toCBORClosure = encText . \case
ClosureComplete -> "complete"
ClosurePartial -> "partial"
instance FromJSON ClosureMode where
parseJSON (String "complete") = pure ClosureComplete
parseJSON (String "partial") = pure ClosurePartial
parseJSON _ = fail "closure must be \"complete\" or \"partial\""
closureFromCBOR :: Decoder s ClosureMode
closureFromCBOR = decodeString >>= \case
"complete" -> pure ClosureComplete
"partial" -> pure ClosurePartial
other -> fail $ "ClosureMode: " ++ show other
-- | Hash specification (algorithm + domain strings).
data NodeHashSpec = NodeHashSpec
{ nodeHashAlgorithm :: Text
, nodeHashDomain :: Text
} deriving (Show, Eq, Ord, Generic)
instance ToJSON NodeHashSpec where
toJSON s = object
[ "algorithm" .= nodeHashAlgorithm s
, "domain" .= nodeHashDomain s
toCBORNodeHashSpec :: NodeHashSpec -> Encoding
toCBORNodeHashSpec (NodeHashSpec alg dom) =
cmkPairs
[ ("algorithm", encText alg)
, ("domain", encText dom)
]
instance FromJSON NodeHashSpec where
parseJSON = withObject "NodeHashSpec" $ \o -> NodeHashSpec
<$> o .: "algorithm"
<*> o .: "domain"
nodeHashSpecFromCBOR :: Decoder s NodeHashSpec
nodeHashSpecFromCBOR = do
n <- decodeMapLen
unless (n == 2) $ fail "NodeHashSpec: must have exactly 2 entries"
decodeKey "algorithm"
alg <- decodeString
decodeKey "domain"
dom <- decodeString
pure (NodeHashSpec alg dom)
-- | Tree specification.
data TreeSpec = TreeSpec
{ treeCalculus :: Text
, treeNodeHash :: NodeHashSpec
, treeNodePayload :: Text
} deriving (Show, Eq, Ord, Generic)
instance ToJSON TreeSpec where
toJSON s = object
[ "calculus" .= treeCalculus s
, "nodeHash" .= treeNodeHash s
, "nodePayload" .= treeNodePayload s
toCBORTreeSpec :: TreeSpec -> Encoding
toCBORTreeSpec (TreeSpec calc hspec payload) =
cmkPairs
[ ("calculus", encText calc)
, ("nodeHash", toCBORNodeHashSpec hspec)
, ("nodePayload", encText payload)
]
instance FromJSON TreeSpec where
parseJSON = withObject "TreeSpec" $ \o -> TreeSpec
<$> o .: "calculus"
<*> o .: "nodeHash"
<*> o .: "nodePayload"
treeSpecFromCBOR :: Decoder s TreeSpec
treeSpecFromCBOR = do
n <- decodeMapLen
unless (n == 3) $ fail "TreeSpec: must have exactly 3 entries"
decodeKey "calculus"
calc <- decodeString
decodeKey "nodeHash"
hspec <- nodeHashSpecFromCBOR
decodeKey "nodePayload"
payload <- decodeString
pure (TreeSpec calc hspec payload)
-- | Runtime specification.
data RuntimeSpec = RuntimeSpec
{ runtimeSemantics :: Text
, runtimeEvaluation :: Text
@@ -139,65 +212,85 @@ data RuntimeSpec = RuntimeSpec
, runtimeCapabilities :: [Text]
} deriving (Show, Eq, Ord, Generic)
instance ToJSON RuntimeSpec where
toJSON s = object
[ "semantics" .= runtimeSemantics s
, "evaluation" .= runtimeEvaluation s
, "abi" .= runtimeAbi s
, "capabilities" .= runtimeCapabilities s
toCBORRuntimeSpec :: RuntimeSpec -> Encoding
toCBORRuntimeSpec (RuntimeSpec sem eval abi caps) =
cmkPairs
[ ("semantics", encText sem)
, ("evaluation", encText eval)
, ("abi", encText abi)
, ("capabilities", cakSeq (map encText caps))
]
instance FromJSON RuntimeSpec where
parseJSON = withObject "RuntimeSpec" $ \o -> RuntimeSpec
<$> o .: "semantics"
<*> o .: "evaluation"
<*> o .: "abi"
<*> o .:? "capabilities" .!= []
runtimeSpecFromCBOR :: Decoder s RuntimeSpec
runtimeSpecFromCBOR = do
n <- decodeMapLen
unless (n == 4) $ fail "RuntimeSpec: must have exactly 4 entries"
decodeKey "semantics"
sem <- decodeString
decodeKey "evaluation"
eval <- decodeString
decodeKey "abi"
abi <- decodeString
decodeKey "capabilities"
clen <- decodeListLen
caps <- decodeListN decodeString clen
pure (RuntimeSpec sem eval abi caps)
-- | A root hash reference.
data BundleRoot = BundleRoot
{ rootHash :: MerkleHash
, rootRole :: Text
} deriving (Show, Eq, Ord, Generic)
instance ToJSON BundleRoot where
toJSON r = object
[ "hash" .= rootHash r
, "role" .= rootRole r
toCBORBundleRoot :: BundleRoot -> Encoding
toCBORBundleRoot (BundleRoot h role) =
cmkPairs
[ ("hash", encBytes (merkleHashToRaw h))
, ("role", encText role)
]
instance FromJSON BundleRoot where
parseJSON = withObject "BundleRoot" $ \o -> BundleRoot
<$> o .: "hash"
<*> o .:? "role" .!= "root"
bundleRootFromCBOR :: Decoder s BundleRoot
bundleRootFromCBOR = do
n <- decodeMapLen
unless (n == 2) $ fail "BundleRoot: must have exactly 2 entries"
decodeKey "hash"
hRaw <- decodeBytes
decodeKey "role"
role <- decodeString
pure (BundleRoot (rawToMerkleHash hRaw) role)
-- | An export entry.
data BundleExport = BundleExport
{ exportName :: Text
, exportRoot :: MerkleHash
, exportKind :: Text
, exportAbi :: Text
, exportInput :: Maybe Text
, exportOutput :: Maybe Text
} deriving (Show, Eq, Ord, Generic)
instance ToJSON BundleExport where
toJSON e = object
[ "name" .= exportName e
, "root" .= exportRoot e
, "kind" .= exportKind e
, "abi" .= exportAbi e
, "input" .= exportInput e
, "output" .= exportOutput e
toCBORBundleExport :: BundleExport -> Encoding
toCBORBundleExport (BundleExport name h kind abi) =
cmkPairs
[ ("name", encText name)
, ("root", encBytes (merkleHashToRaw h))
, ("kind", encText kind)
, ("abi", encText abi)
]
instance FromJSON BundleExport where
parseJSON = withObject "BundleExport" $ \o -> BundleExport
<$> o .: "name"
<*> o .: "root"
<*> o .:? "kind" .!= "term"
<*> o .:? "abi" .!= "arborix.abi.tree.v1"
<*> o .:? "input"
<*> o .:? "output"
bundleExportFromCBOR :: Decoder s BundleExport
bundleExportFromCBOR = do
n <- decodeMapLen
unless (n == 4) $ fail "BundleExport: must have exactly 4 entries"
decodeKey "name"
name <- decodeString
decodeKey "root"
hRaw <- decodeBytes
decodeKey "kind"
kind <- decodeString
decodeKey "abi"
abi <- decodeString
pure (BundleExport name (rawToMerkleHash hRaw) kind abi)
-- | Optional package metadata.
data BundleMetadata = BundleMetadata
{ metadataPackage :: Maybe Text
, metadataVersion :: Maybe Text
@@ -206,23 +299,34 @@ data BundleMetadata = BundleMetadata
, metadataCreatedBy :: Maybe Text
} deriving (Show, Eq, Ord, Generic)
instance ToJSON BundleMetadata where
toJSON m = object
[ "package" .= metadataPackage m
, "version" .= metadataVersion m
, "description" .= metadataDescription m
, "license" .= metadataLicense m
, "createdBy" .= metadataCreatedBy m
]
metadataFromCBOR :: Decoder s BundleMetadata
metadataFromCBOR = do
mlen <- decodeMapLen
entries <- decodeMapN decodeString decodeString mlen
let lookupText k = go k entries
go _ [] = Nothing
go k ((k', v):rest)
| k == k' = Just v
| otherwise = go k rest
pure BundleMetadata
{ metadataPackage = lookupText "package"
, metadataVersion = lookupText "version"
, metadataDescription = lookupText "description"
, metadataLicense = lookupText "license"
, metadataCreatedBy = lookupText "createdBy"
}
instance FromJSON BundleMetadata where
parseJSON = withObject "BundleMetadata" $ \o -> BundleMetadata
<$> o .:? "package"
<*> o .:? "version"
<*> o .:? "description"
<*> o .:? "license"
<*> o .:? "createdBy"
metadataToCBOR :: BundleMetadata -> Encoding
metadataToCBOR (BundleMetadata pkg ver desc lic by) =
let pairs =
maybe [] (\v -> [("package", encText v)]) pkg
++ maybe [] (\v -> [("version", encText v)]) ver
++ maybe [] (\v -> [("description", encText v)]) desc
++ maybe [] (\v -> [("license", encText v)]) lic
++ maybe [] (\v -> [("createdBy", encText v)]) by
in cmkPairs pairs
-- | The manifest: top-level bundle metadata.
data BundleManifest = BundleManifest
{ manifestSchema :: Text
, manifestBundleType :: Text
@@ -231,37 +335,45 @@ data BundleManifest = BundleManifest
, manifestClosure :: ClosureMode
, manifestRoots :: [BundleRoot]
, manifestExports :: [BundleExport]
, manifestImports :: [Value]
, manifestSections :: Value
, manifestMetadata :: BundleMetadata
} deriving (Show, Eq, Generic)
instance ToJSON BundleManifest where
toJSON m = object
[ "schema" .= manifestSchema m
, "bundleType" .= manifestBundleType m
, "tree" .= manifestTree m
, "runtime" .= manifestRuntime m
, "closure" .= manifestClosure m
, "roots" .= manifestRoots m
, "exports" .= manifestExports m
, "imports" .= manifestImports m
, "sections" .= manifestSections m
, "metadata" .= manifestMetadata m
manifestToCBOR :: BundleManifest -> Encoding
manifestToCBOR m =
cmkPairs
[ ("schema", encText (manifestSchema m))
, ("bundleType", encText (manifestBundleType m))
, ("tree", toCBORTreeSpec (manifestTree m))
, ("runtime", toCBORRuntimeSpec (manifestRuntime m))
, ("closure", toCBORClosure (manifestClosure m))
, ("roots", cakSeq (map toCBORBundleRoot (manifestRoots m)))
, ("exports", cakSeq (map toCBORBundleExport (manifestExports m)))
, ("metadata", metadataToCBOR (manifestMetadata m))
]
instance FromJSON BundleManifest where
parseJSON = withObject "BundleManifest" $ \o -> BundleManifest
<$> o .: "schema"
<*> o .: "bundleType"
<*> o .: "tree"
<*> o .: "runtime"
<*> o .: "closure"
<*> o .: "roots"
<*> o .: "exports"
<*> o .:? "imports" .!= []
<*> o .:? "sections" .!= object []
<*> o .:? "metadata" .!= BundleMetadata Nothing Nothing Nothing Nothing Nothing
manifestFromCBOR :: Decoder s BundleManifest
manifestFromCBOR = do
n <- decodeMapLen
unless (n == 8) $ fail "BundleManifest: must have exactly 8 entries"
decodeKey "schema"
schema <- decodeString
decodeKey "bundleType"
bundleType <- decodeString
decodeKey "tree"
tree <- treeSpecFromCBOR
decodeKey "runtime"
runtime <- runtimeSpecFromCBOR
decodeKey "closure"
closure <- closureFromCBOR
decodeKey "roots"
rlen <- decodeListLen
roots <- decodeListN bundleRootFromCBOR rlen
decodeKey "exports"
elen <- decodeListLen
exports <- decodeListN bundleExportFromCBOR elen
decodeKey "metadata"
metadata <- metadataFromCBOR
pure (BundleManifest schema bundleType tree runtime closure roots exports metadata)
-- | Portable executable-object bundle.
--
@@ -276,12 +388,33 @@ data Bundle = Bundle
, bundleManifestBytes :: ByteString
} deriving (Show, Eq)
-- ---------------------------------------------------------------------------
-- CBOR manifest serialization
-- ---------------------------------------------------------------------------
-- | Encode the manifest as canonical CBOR.
encodeManifest :: BundleManifest -> ByteString
encodeManifest m = BL.toStrict (toLazyByteString (manifestToCBOR m))
-- | Decode a manifest from CBOR bytes.
decodeManifest :: ByteString -> Either String BundleManifest
decodeManifest bs =
case deserialiseFromBytes manifestFromCBOR (BL.fromStrict bs) of
Right (rest, m)
| BS.null (BL.toStrict rest) -> Right m
| otherwise -> Left "trailing bytes after manifest CBOR"
Left (DeserialiseFailure _ msg) -> Left msg
-- ---------------------------------------------------------------------------
-- Bundle encoding
-- ---------------------------------------------------------------------------
-- | Encode a Bundle to portable Bundle v1 bytes.
encodeBundle :: Bundle -> ByteString
encodeBundle bundle =
let nodeSection = encodeNodeSection (bundleNodes bundle)
manifestBytes = if BS.null (bundleManifestBytes bundle)
then BL.toStrict (encode (bundleManifest bundle))
then encodeManifest (bundleManifest bundle)
else bundleManifestBytes bundle
sectionCount = 2
dirOffset = fromIntegral headerLength
@@ -346,15 +479,14 @@ decodePortableBundle bs = do
dirBytes = fromIntegral sectionCount * sectionEntryLength
when (BS.length bs < dirStart + dirBytes) $
Left "bundle truncated in section directory"
entries <- decodeSectionEntries sectionCount (BS.take dirBytes $ BS.drop dirStart bs)
let dirRaw = BS.take dirBytes $ BS.drop dirStart bs
entries <- decodeSectionEntries sectionCount dirRaw
traverse_ rejectUnknownCritical entries
manifestEntry <- requireSection sectionManifest entries
nodesEntry <- requireSection sectionNodes entries
manifestBytes <- readAndVerifySection bs manifestEntry
nodesBytes <- readAndVerifySection bs nodesEntry
manifest <- case eitherDecodeStrict' manifestBytes of
Left err -> Left $ "invalid manifest JSON: " ++ err
Right m -> Right m
manifest <- decodeManifest manifestBytes
nodes <- decodeNodeSection nodesBytes
let roots = map rootHash (manifestRoots manifest)
return Bundle
@@ -429,8 +561,8 @@ decodeSectionEntries count bytes = reverse <$> go count bytes []
-- Manifest construction
-- ---------------------------------------------------------------------------
defaultManifest :: [(Text, MerkleHash)] -> Int -> BundleManifest
defaultManifest namedRoots nodeCount = BundleManifest
defaultManifest :: [(Text, MerkleHash)] -> BundleManifest
defaultManifest namedRoots = BundleManifest
{ manifestSchema = "arborix.bundle.manifest.v1"
, manifestBundleType = "tree-calculus-executable-object"
, manifestTree = TreeSpec
@@ -450,18 +582,11 @@ defaultManifest namedRoots nodeCount = BundleManifest
, manifestClosure = ClosureComplete
, manifestRoots = zipWith mkRoot [0 :: Int ..] (map snd namedRoots)
, manifestExports = map mkExport namedRoots
, manifestImports = []
, manifestSections = object
[ "nodes" .= object
[ "count" .= nodeCount
, "payload" .= ("arborix.merkle.payload.v1" :: Text)
]
]
, manifestMetadata = BundleMetadata
{ metadataPackage = Nothing
, metadataVersion = Nothing
{ metadataPackage = Nothing
, metadataVersion = Nothing
, metadataDescription = Nothing
, metadataLicense = Nothing
, metadataLicense = Nothing
, metadataCreatedBy = Just "arborix"
}
}
@@ -473,8 +598,6 @@ defaultManifest namedRoots nodeCount = BundleManifest
, exportRoot = h
, exportKind = "term"
, exportAbi = "arborix.abi.tree.v1"
, exportInput = Nothing
, exportOutput = Nothing
}
-- ---------------------------------------------------------------------------
@@ -568,12 +691,10 @@ verifyManifest manifest = do
Left $ "unsupported runtime semantics: " ++ unpack (runtimeSemantics runtimeSpec)
when (runtimeAbi runtimeSpec /= "arborix.abi.tree.v1") $
Left $ "unsupported runtime ABI: " ++ unpack (runtimeAbi runtimeSpec)
unless (null $ runtimeCapabilities runtimeSpec) $
Left "host/runtime capabilities are not supported by bundle v1"
when (not (null (runtimeCapabilities runtimeSpec))) $
Left "unsupported runtime capabilities"
when (manifestClosure manifest /= ClosureComplete) $
Left "bundle v1 imports require closure = complete"
unless (null $ manifestImports manifest) $
Left "bundle v1 imports require an empty imports list"
Left "bundle v1 requires closure = complete"
when (null $ manifestRoots manifest) $
Left "manifest has no roots"
when (null $ manifestExports manifest) $
@@ -674,8 +795,8 @@ exportNamedBundle conn namedHashes = do
let hashes = map snd namedHashes
entries <- concat <$> mapM (collectReachableNodes conn) hashes
let nodeMap = Map.fromList entries
manifest = defaultManifest namedHashes (Map.size nodeMap)
manifestBytes = BL.toStrict (encode manifest)
manifest = defaultManifest namedHashes
manifestBytes = encodeManifest manifest
bundle = Bundle
{ bundleVersion = bundleMajorVersion * 1000 + bundleMinorVersion
, bundleRoots = hashes
@@ -793,6 +914,8 @@ rawToMerkleHash bs = decodeUtf8 (Base16.encode bs)
sha256 :: ByteString -> ByteString
sha256 bytes = convert ((hash bytes) :: Digest SHA256)
defaultExportNames :: Int -> [Text]
defaultExportNames n =
case n of