Files
tricu/src/Wire.hs
James Eversole fdebb6c13d Tricu 2.0.0
Sorry for squashing all of this but 🤷
2026-05-25 12:44:24 -05:00

866 lines
33 KiB
Haskell

{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
module Wire
( Bundle (..)
, BundleManifest (..)
, TreeSpec (..)
, NodeHashSpec (..)
, RuntimeSpec (..)
, BundleRoot (..)
, BundleExport (..)
, BundleMetadata
, ClosureMode (..)
, BundleNode (..)
, encodeBundle
, decodeBundle
, verifyBundle
, buildBundle
, reconstructBundleTerms
, defaultExportNames
) where
import Research hiding (Node)
import Control.Monad (foldM, forM_, unless, when)
import Data.Bits (shiftL, shiftR, (.|.), (.&.))
import Data.ByteString (ByteString)
import Data.Foldable (traverse_)
import qualified Data.Foldable as Foldable
import Data.List (mapAccumL)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Sequence (Seq, (|>))
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (Text, unpack)
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
import Data.Vector (Vector)
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as MV
import Data.Word (Word16, Word32, Word64, Word8)
import GHC.Generics (Generic)
import qualified Data.ByteString as BS
import qualified Data.Text as T
-- ---------------------------------------------------------------------------
-- Container constants
-- ---------------------------------------------------------------------------
bundleMajorVersion :: Word16
bundleMajorVersion = 1
bundleMinorVersion :: Word16
bundleMinorVersion = 0
bundleMagic :: ByteString
bundleMagic = BS.pack [0x41, 0x52, 0x42, 0x4f, 0x52, 0x49, 0x43, 0x58]
headerLength :: Int
headerLength = 32
sectionEntryLength :: Int
sectionEntryLength = 32
sectionManifest, sectionNodes :: Word32
sectionManifest = 1
sectionNodes = 2
flagCritical :: Word16
flagCritical = 0x0001
compressionNone :: Word16
compressionNone = 0
-- ---------------------------------------------------------------------------
-- Manifest constants
-- ---------------------------------------------------------------------------
manifestMagic :: ByteString
manifestMagic = "ARBMNFST"
manifestMajorVersion :: Word16
manifestMajorVersion = 1
manifestMinorVersion :: Word16
manifestMinorVersion = 1
closureToByte :: ClosureMode -> Word8
closureToByte = \case
ClosureComplete -> 0
ClosurePartial -> 1
closureFromByte :: Word8 -> Either String ClosureMode
closureFromByte = \case
0 -> Right ClosureComplete
1 -> Right ClosurePartial
n -> Left $ "unsupported closure byte: " ++ show n
tagPackage, tagVersion, tagDescription, tagLicense, tagCreatedBy :: Word16
tagPackage = 1
tagVersion = 2
tagDescription = 3
tagLicense = 4
tagCreatedBy = 5
-- ---------------------------------------------------------------------------
-- Text encoding helpers
-- ---------------------------------------------------------------------------
encodeLengthPrefixedText :: Text -> ByteString
encodeLengthPrefixedText t = encode32 (fromIntegral $ BS.length bs) <> bs
where bs = encodeUtf8 t
decodeLengthPrefixedText :: ByteString -> Either String (Text, ByteString)
decodeLengthPrefixedText bs = do
(len, rest) <- decode32be "text_length" bs
let payloadLen = fromIntegral len
when (BS.length rest < payloadLen) $
Left "decodeLengthPrefixedText: string extends beyond input"
let (textBytes, after) = BS.splitAt payloadLen rest
case decodeUtf8' textBytes of
Right txt -> Right (txt, after)
Left _ -> Left "decodeLengthPrefixedText: invalid UTF-8"
encodeMetadataTLV :: Word16 -> ByteString -> ByteString
encodeMetadataTLV tag val = encode16 tag <> encode32 (fromIntegral $ BS.length val) <> val
-- ---------------------------------------------------------------------------
-- Manifest encoders
-- ---------------------------------------------------------------------------
encodeManifest :: BundleManifest -> ByteString
encodeManifest m =
manifestMagic
<> encode16 manifestMajorVersion
<> encode16 manifestMinorVersion
<> encodeLengthPrefixedText (manifestSchema m)
<> encodeLengthPrefixedText (manifestBundleType m)
<> encodeLengthPrefixedText (treeCalculus (manifestTree m))
<> encodeLengthPrefixedText (nodeHashAlgorithm (treeNodeHash (manifestTree m)))
<> encodeLengthPrefixedText (nodeHashDomain (treeNodeHash (manifestTree m)))
<> encodeLengthPrefixedText (treeNodePayload (manifestTree m))
<> encodeLengthPrefixedText (runtimeSemantics (manifestRuntime m))
<> encodeLengthPrefixedText (runtimeEvaluation (manifestRuntime m))
<> encodeLengthPrefixedText (runtimeAbi (manifestRuntime m))
<> encode32 (fromIntegral $ length (runtimeCapabilities (manifestRuntime m)))
<> encodeCapabilities (runtimeCapabilities (manifestRuntime m))
<> BS.pack [closureToByte (manifestClosure m)]
<> encode32 (fromIntegral $ length (manifestRoots m))
<> encodeRoots (manifestRoots m)
<> encode32 (fromIntegral $ length (manifestExports m))
<> encodeExports (manifestExports m)
<> encodeMetadataTLVs (manifestMetadata m)
<> encode32 0
encodeCapabilities :: [Text] -> ByteString
encodeCapabilities = mconcat . map encodeLengthPrefixedText
encodeRoots :: [BundleRoot] -> ByteString
encodeRoots = mconcat . map encodeRoot
encodeRoot :: BundleRoot -> ByteString
encodeRoot root = encode32 (rootIndex root) <> encodeLengthPrefixedText (rootRole root)
encodeExports :: [BundleExport] -> ByteString
encodeExports = mconcat . map encodeExport
encodeExport :: BundleExport -> ByteString
encodeExport exp =
encodeLengthPrefixedText (exportName exp)
<> encode32 (exportRoot exp)
<> encodeLengthPrefixedText (exportKind exp)
<> encodeLengthPrefixedText (exportAbi exp)
encodeMetadataTLVs :: BundleMetadata -> ByteString
encodeMetadataTLVs m =
let entries = metadataTLVEntries m
in encode32 (fromIntegral $ length entries) <> encodeTLVs entries
metadataTLVEntries :: BundleMetadata -> [(Word16, ByteString)]
metadataTLVEntries m =
maybeEntry tagPackage (metadataPackage m)
++ maybeEntry tagVersion (metadataVersion m)
++ maybeEntry tagDescription (metadataDescription m)
++ maybeEntry tagLicense (metadataLicense m)
++ maybeEntry tagCreatedBy (metadataCreatedBy m)
where
maybeEntry _ Nothing = []
maybeEntry tag (Just value) = [(tag, encodeUtf8 value)]
encodeTLVs :: [(Word16, ByteString)] -> ByteString
encodeTLVs = mconcat . map (uncurry encodeMetadataTLV)
-- ---------------------------------------------------------------------------
-- Manifest decoders
-- ---------------------------------------------------------------------------
decodeManifest :: ByteString -> Either String BundleManifest
decodeManifest bs = do
when (BS.length bs < 8) $ Left "manifest too short for magic"
when (BS.take 8 bs /= manifestMagic) $ Left "invalid manifest magic"
let rest = BS.drop 8 bs
(major, rest') <- decode16be "major" rest
(minor, rest'') <- decode16be "minor" rest'
when (major /= manifestMajorVersion) $
Left $ "unsupported manifest major version: " ++ show major
when (minor /= manifestMinorVersion) $
Left $ "unsupported manifest minor version: " ++ show minor
(schema, r1) <- decodeLengthPrefixedText rest''
(bundleType, r2) <- decodeLengthPrefixedText r1
(calc, r3) <- decodeLengthPrefixedText r2
(alg, r4) <- decodeLengthPrefixedText r3
(domain, r5) <- decodeLengthPrefixedText r4
(payload, r6) <- decodeLengthPrefixedText r5
(sem, r7) <- decodeLengthPrefixedText r6
(eval, r8) <- decodeLengthPrefixedText r7
(abi, r9) <- decodeLengthPrefixedText r8
(capCount, r10) <- decode32be "capability_count" r9
(caps, r11) <- decodeCapabilities (fromIntegral capCount) r10
when (BS.length r11 < 1) $ Left "manifest truncated: missing closure byte"
let (closureByte, r12) = BS.splitAt 1 r11
closure <- closureFromByte (head $ BS.unpack closureByte)
(rootCount, r13) <- decode32be "root_count" r12
(roots, r14) <- decodeRoots (fromIntegral rootCount) r13
(exportCount, r15) <- decode32be "export_count" r14
(exports, r16) <- decodeExports (fromIntegral exportCount) r15
(metadata, _ext) <- decodeMetadataAndExtensions r16
pure BundleManifest
{ manifestSchema = schema
, manifestBundleType = bundleType
, manifestTree = TreeSpec
{ treeCalculus = calc
, treeNodeHash = NodeHashSpec
{ nodeHashAlgorithm = alg
, nodeHashDomain = domain
}
, treeNodePayload = payload
}
, manifestRuntime = RuntimeSpec
{ runtimeSemantics = sem
, runtimeEvaluation = eval
, runtimeAbi = abi
, runtimeCapabilities = caps
}
, manifestClosure = closure
, manifestRoots = roots
, manifestExports = exports
, manifestMetadata = metadata
}
decodeCapabilities :: Int -> ByteString -> Either String ([Text], ByteString)
decodeCapabilities 0 bs = Right ([], bs)
decodeCapabilities n bs = do
(txt, rest) <- decodeLengthPrefixedText bs
(restTxts, restFinal) <- decodeCapabilities (n - 1) rest
Right (txt : restTxts, restFinal)
decodeRoots :: Int -> ByteString -> Either String ([BundleRoot], ByteString)
decodeRoots 0 bs = Right ([], bs)
decodeRoots n bs = do
(idx, rest1) <- decode32be "root_index" bs
(role, rest2) <- decodeLengthPrefixedText rest1
(restRoots, restFinal) <- decodeRoots (n - 1) rest2
Right (BundleRoot idx role : restRoots, restFinal)
decodeExports :: Int -> ByteString -> Either String ([BundleExport], ByteString)
decodeExports 0 bs = Right ([], bs)
decodeExports n bs = do
(name, r1) <- decodeLengthPrefixedText bs
(idx, r2) <- decode32be "export_root" r1
(kind, r3) <- decodeLengthPrefixedText r2
(abi, r4) <- decodeLengthPrefixedText r3
(restExports, restFinal) <- decodeExports (n - 1) r4
Right (BundleExport name idx kind abi : restExports, restFinal)
decodeMetadataAndExtensions :: ByteString -> Either String (BundleMetadata, ByteString)
decodeMetadataAndExtensions bs = do
(metadataCount, rest1) <- decode32be "metadata_field_count" bs
(metadataTlvs, rest2) <- decodeTLVs (fromIntegral metadataCount) rest1
metadata <- decodeMetadataTLVs metadataTlvs
(extensionCount, rest3) <- decode32be "extension_field_count" rest2
(_extensionTlvs, rest4) <- decodeTLVs (fromIntegral extensionCount) rest3
unless (BS.null rest4) $ Left "trailing bytes after manifest TLV tail"
Right (metadata, rest4)
decodeTLVs :: Int -> ByteString -> Either String ([TLVEntry], ByteString)
decodeTLVs 0 bs = Right ([], bs)
decodeTLVs n bs = do
(tag, r1) <- decode16be "tlv_tag" bs
(len, r2) <- decode32be "tlv_length" r1
let payloadLen = fromIntegral len
when (BS.length r2 < payloadLen) $ Left "TLV value extends beyond input"
let (value, after) = BS.splitAt payloadLen r2
(restTlvs, restFinal) <- decodeTLVs (n - 1) after
Right ((tag, value) : restTlvs, restFinal)
decodeMetadataTLVs :: [(Word16, ByteString)] -> Either String BundleMetadata
decodeMetadataTLVs tlvs = do
pkg <- lookupText tagPackage
ver <- lookupText tagVersion
desc <- lookupText tagDescription
lic <- lookupText tagLicense
by <- lookupText tagCreatedBy
pure BundleMetadata
{ metadataPackage = pkg
, metadataVersion = ver
, metadataDescription = desc
, metadataLicense = lic
, metadataCreatedBy = by
}
where
lookupTag t = go t tlvs
go _ [] = Nothing
go t ((tag, val):rest)
| tag == t = Just val
| otherwise = go t rest
lookupText tag =
case lookupTag tag of
Nothing -> Right Nothing
Just raw -> case decodeUtf8' raw of
Right txt -> Right (Just txt)
Left _ -> Left $ "metadata TLV has invalid UTF-8 for tag " ++ show tag
type TLVEntry = (Word16, ByteString)
-- ---------------------------------------------------------------------------
-- Data types
-- ---------------------------------------------------------------------------
data ClosureMode = ClosureComplete | ClosurePartial
deriving (Show, Eq, Ord, Generic)
data NodeHashSpec = NodeHashSpec
{ nodeHashAlgorithm :: Text
, nodeHashDomain :: Text
} deriving (Show, Eq, Ord, Generic)
data TreeSpec = TreeSpec
{ treeCalculus :: Text
, treeNodeHash :: NodeHashSpec
, treeNodePayload :: Text
} deriving (Show, Eq, Ord, Generic)
data RuntimeSpec = RuntimeSpec
{ runtimeSemantics :: Text
, runtimeEvaluation :: Text
, runtimeAbi :: Text
, runtimeCapabilities :: [Text]
} deriving (Show, Eq, Ord, Generic)
data BundleRoot = BundleRoot
{ rootIndex :: Word32
, rootRole :: Text
} deriving (Show, Eq, Ord, Generic)
data BundleExport = BundleExport
{ exportName :: Text
, exportRoot :: Word32
, exportKind :: Text
, exportAbi :: Text
} deriving (Show, Eq, Ord, Generic)
data BundleMetadata = BundleMetadata
{ metadataPackage :: Maybe Text
, metadataVersion :: Maybe Text
, metadataDescription :: Maybe Text
, metadataLicense :: Maybe Text
, metadataCreatedBy :: Maybe Text
} deriving (Show, Eq, Ord, Generic)
data BundleManifest = BundleManifest
{ manifestSchema :: Text
, manifestBundleType :: Text
, manifestTree :: TreeSpec
, manifestRuntime :: RuntimeSpec
, manifestClosure :: ClosureMode
, manifestRoots :: [BundleRoot]
, manifestExports :: [BundleExport]
, manifestMetadata :: BundleMetadata
} deriving (Show, Eq, Generic)
data BundleNode
= BNLeaf
| BNStem !Word32
| BNFork !Word32 !Word32
deriving (Show, Eq)
data Bundle = Bundle
{ bundleVersion :: Word16
, bundleRoots :: [Word32]
, bundleNodes :: Seq BundleNode
, bundleManifest :: BundleManifest
, bundleManifestBytes :: ByteString
} deriving (Show, Eq)
-- ---------------------------------------------------------------------------
-- Bundle construction
-- ---------------------------------------------------------------------------
data NodeKey = KeyLeaf | KeyStem !Word32 | KeyFork !Word32 !Word32
deriving (Eq, Ord, Show)
buildBundle :: [(Text, T)] -> Bundle
buildBundle namedTerms =
let go :: T -> (Seq BundleNode, Map NodeKey Word32) -> (Word32, (Seq BundleNode, Map NodeKey Word32))
go Leaf (nodes, seen) =
case Map.lookup KeyLeaf seen of
Just idx -> (idx, (nodes, seen))
Nothing ->
let idx = fromIntegral (Seq.length nodes)
in (idx, (nodes |> BNLeaf, Map.insert KeyLeaf idx seen))
go (Stem child) (nodes, seen) =
let (childIdx, state1) = go child (nodes, seen)
(nodes1, seen1) = state1
in case Map.lookup (KeyStem childIdx) seen1 of
Just idx -> (idx, state1)
Nothing ->
let idx = fromIntegral (Seq.length nodes1)
in (idx, (nodes1 |> BNStem childIdx, Map.insert (KeyStem childIdx) idx seen1))
go (Fork left right) (nodes, seen) =
let (leftIdx, state1) = go left (nodes, seen)
(rightIdx, state2) = go right state1
(nodes2, seen2) = state2
in case Map.lookup (KeyFork leftIdx rightIdx) seen2 of
Just idx -> (idx, state2)
Nothing ->
let idx = fromIntegral (Seq.length nodes2)
in (idx, (nodes2 |> BNFork leftIdx rightIdx, Map.insert (KeyFork leftIdx rightIdx) idx seen2))
processExport state (_, t) = let (idx, newState) = go t state in (newState, idx)
((finalNodes, _), rootIndices) = mapAccumL processExport (Seq.empty, Map.empty) namedTerms
roots = zipWith mkRoot [0 :: Int ..] rootIndices
exports = zipWith mkExport namedTerms rootIndices
manifest = makeManifest roots exports
manifestBytes = encodeManifest manifest
in Bundle
{ bundleVersion = bundleMajorVersion * 1000 + bundleMinorVersion
, bundleRoots = rootIndices
, bundleNodes = finalNodes
, bundleManifest = manifest
, bundleManifestBytes = manifestBytes
}
where
mkRoot 0 idx = BundleRoot idx "default"
mkRoot _ idx = BundleRoot idx "root"
mkExport (name, _) idx = BundleExport name idx "term" "arboricx.abi.tree.v1"
makeManifest :: [BundleRoot] -> [BundleExport] -> BundleManifest
makeManifest roots exports = BundleManifest
{ manifestSchema = "arboricx.bundle.manifest.v1"
, manifestBundleType = "tree-calculus-executable-object"
, manifestTree = TreeSpec
{ treeCalculus = "tree-calculus.v1"
, treeNodeHash = NodeHashSpec
{ nodeHashAlgorithm = "indexed"
, nodeHashDomain = "arboricx.indexed.node.v1"
}
, treeNodePayload = "arboricx.indexed.payload.v1"
}
, manifestRuntime = RuntimeSpec
{ runtimeSemantics = "tree-calculus.v1"
, runtimeEvaluation = "normal-order"
, runtimeAbi = "arboricx.abi.tree.v1"
, runtimeCapabilities = []
}
, manifestClosure = ClosureComplete
, manifestRoots = roots
, manifestExports = exports
, manifestMetadata = BundleMetadata
{ metadataPackage = Nothing
, metadataVersion = Nothing
, metadataDescription = Nothing
, metadataLicense = Nothing
, metadataCreatedBy = Just "arboricx"
}
}
-- ---------------------------------------------------------------------------
-- Bundle encoding / decoding
-- ---------------------------------------------------------------------------
encodeBundle :: Bundle -> ByteString
encodeBundle bundle =
let nodeSection = encodeNodeSection (bundleNodes bundle)
manifestBytes = bundleManifestBytes bundle
sectionCount = 2
dirOffset = fromIntegral headerLength
sectionDirLength = sectionCount * sectionEntryLength
manifestOffset = fromIntegral (headerLength + sectionDirLength)
nodesOffset = manifestOffset + fromIntegral (BS.length manifestBytes)
manifestEntry = encodeSectionEntry sectionManifest 1 flagCritical compressionNone
manifestOffset (fromIntegral $ BS.length manifestBytes)
nodesEntry = encodeSectionEntry sectionNodes 1 flagCritical compressionNone
nodesOffset (fromIntegral $ BS.length nodeSection)
header = encodeHeader bundleMajorVersion bundleMinorVersion
(fromIntegral sectionCount) 0 dirOffset
in header <> manifestEntry <> nodesEntry <> manifestBytes <> nodeSection
decodeBundle :: ByteString -> Either String Bundle
decodeBundle bs
| BS.take (BS.length bundleMagic) bs /= bundleMagic = Left "invalid magic"
| otherwise = do
(major, minor, sectionCount, _flags, dirOffset) <- decodePortableHeader bs
when (major /= bundleMajorVersion) $
Left $ "unsupported bundle major version: " ++ show major
let dirStart = fromIntegral dirOffset
dirBytes = fromIntegral sectionCount * sectionEntryLength
when (BS.length bs < dirStart + dirBytes) $
Left "bundle truncated in section directory"
let dirRaw = BS.take dirBytes $ BS.drop dirStart bs
entries <- decodeSectionEntries sectionCount dirRaw
traverse_ rejectUnknownCritical entries
manifestEntry <- requireSection sectionManifest entries
nodesEntry <- requireSection sectionNodes entries
manifestBytes <- readAndVerifySection bs manifestEntry
nodesBytes <- readAndVerifySection bs nodesEntry
manifest <- decodeManifest manifestBytes
when (treeNodePayload (manifestTree manifest) /= "arboricx.indexed.payload.v1") $
Left "manifest does not use indexed payload"
nodes <- decodeNodeSection nodesBytes
let rootIndices = map rootIndex (manifestRoots manifest)
return Bundle
{ bundleVersion = major * 1000 + minor
, bundleRoots = rootIndices
, bundleNodes = nodes
, bundleManifest = manifest
, bundleManifestBytes = manifestBytes
}
-- ---------------------------------------------------------------------------
-- Container encoding / decoding
-- ---------------------------------------------------------------------------
data SectionEntry = SectionEntry
{ seType :: Word32
, seVersion :: Word16
, seFlags :: Word16
, seCompression :: Word16
, seOffset :: Word64
, seLength :: Word64
} deriving (Show, Eq)
encodeHeader :: Word16 -> Word16 -> Word32 -> Word64 -> Word64 -> ByteString
encodeHeader major minor sectionCount flags dirOffset =
bundleMagic
<> encode16 major
<> encode16 minor
<> encode32 sectionCount
<> encode64 flags
<> encode64 dirOffset
encodeSectionEntry :: Word32 -> Word16 -> Word16 -> Word16 -> Word64 -> Word64 -> ByteString
encodeSectionEntry sectionType sectionVersion sectionFlags compression offset lengthBytes =
encode32 sectionType
<> encode16 sectionVersion
<> encode16 sectionFlags
<> encode16 compression
<> encode16 0 -- reserved
<> encode64 offset
<> encode64 lengthBytes
<> encode32 0 -- reserved padding
decodePortableHeader :: ByteString -> Either String (Word16, Word16, Word32, Word64, Word64)
decodePortableHeader bs
| BS.length bs < headerLength = Left "bundle too short for header"
| BS.take 8 bs /= bundleMagic = Left "invalid portable bundle magic"
| otherwise = do
(major, r1) <- decode16be "major_version" (BS.drop 8 bs)
(minor, r2) <- decode16be "minor_version" r1
(sectionCount, r3) <- decode32be "section_count" r2
(flags, r4) <- decode64be "flags" r3
(dirOffset, _) <- decode64be "directory_offset" r4
Right (major, minor, sectionCount, flags, dirOffset)
decodeSectionEntries :: Word32 -> ByteString -> Either String [SectionEntry]
decodeSectionEntries count bytes = reverse <$> go count bytes []
where
go 0 _ acc = Right acc
go n bs acc = do
when (BS.length bs < sectionEntryLength) $
Left "section directory truncated"
(sectionType, r1) <- decode32be "section_type" bs
(sectionVersion, r2) <- decode16be "section_version" r1
(sectionFlags, r3) <- decode16be "section_flags" r2
(compression, r4) <- decode16be "compression_codec" r3
(_reserved, r5) <- decode16be "reserved" r4
(offset, r6) <- decode64be "section_offset" r5
(len, r7) <- decode64be "section_length" r6
(_reserved2, rest) <- decode32be "reserved" r7
let entry = SectionEntry sectionType sectionVersion sectionFlags compression offset len
go (n - 1) rest (entry : acc)
rejectUnknownCritical :: SectionEntry -> Either String ()
rejectUnknownCritical entry =
let known = seType entry `elem` [sectionManifest, sectionNodes]
critical = seFlags entry .&. flagCritical /= 0
in when (critical && not known) $
Left $ "unknown critical section type: " ++ show (seType entry)
requireSection :: Word32 -> [SectionEntry] -> Either String SectionEntry
requireSection sectionType entries =
case filter ((== sectionType) . seType) entries of
[entry] -> Right entry
[] -> Left $ "missing required section type: " ++ show sectionType
_ -> Left $ "duplicate section type: " ++ show sectionType
readAndVerifySection :: ByteString -> SectionEntry -> Either String ByteString
readAndVerifySection bs entry = do
when (seCompression entry /= compressionNone) $
Left $ "unsupported compression codec in section " ++ show (seType entry)
let offset = fromIntegral (seOffset entry)
len = fromIntegral (seLength entry)
when (offset < 0 || len < 0 || BS.length bs < offset + len) $
Left $ "section extends beyond bundle end: " ++ show (seType entry)
Right $ BS.take len $ BS.drop offset bs
-- ---------------------------------------------------------------------------
-- Node section encoding / decoding
-- ---------------------------------------------------------------------------
serializeBundleNode :: BundleNode -> ByteString
serializeBundleNode BNLeaf = BS.pack [0x00]
serializeBundleNode (BNStem child) = BS.pack [0x01] <> encode32 child
serializeBundleNode (BNFork left right) = BS.pack [0x02] <> encode32 left <> encode32 right
encodeNodeSection :: Seq BundleNode -> ByteString
encodeNodeSection nodes =
encode64 (fromIntegral $ Seq.length nodes)
<> foldMap encodeNodeEntry nodes
where
encodeNodeEntry node =
let payload = serializeBundleNode node
in encode32 (fromIntegral $ BS.length payload) <> payload
decodeNodeSection :: ByteString -> Either String (Seq BundleNode)
decodeNodeSection bs = do
(nodeCount, rest) <- decode64be "node_count" bs
decodeNodeEntries nodeCount rest
decodeNodeEntries :: Word64 -> ByteString -> Either String (Seq BundleNode)
decodeNodeEntries count bs = go count bs Seq.empty
where
go 0 rest acc
| BS.null rest = Right acc
| otherwise = Left "trailing bytes after node section"
go n bytes acc
| BS.length bytes < 4 =
Left "not enough bytes for node entry length"
| otherwise = do
(plen, rest) <- decode32be "payload_len" bytes
let payloadLen = fromIntegral plen
if BS.length rest < payloadLen
then Left "payload extends beyond node section end"
else do
let (payload, after) = BS.splitAt payloadLen rest
node <- deserializeBundleNode payload
go (n - 1) after (acc |> node)
deserializeBundleNode :: ByteString -> Either String BundleNode
deserializeBundleNode payload =
case BS.uncons payload of
Just (0x00, rest)
| BS.null rest -> Right BNLeaf
| otherwise -> Left "invalid leaf payload length"
Just (0x01, rest)
| BS.length rest == 4 -> Right $ BNStem (decodeU32 rest)
| otherwise -> Left "invalid stem payload length"
Just (0x02, rest)
| BS.length rest == 8 ->
let (leftBytes, rightBytes) = BS.splitAt 4 rest
in Right $ BNFork (decodeU32 leftBytes) (decodeU32 rightBytes)
| otherwise -> Left "invalid fork payload length"
_ -> Left "invalid node payload"
decodeU32 :: ByteString -> Word32
decodeU32 bs =
let b0 = fromIntegral (BS.index bs 0) :: Word32
b1 = fromIntegral (BS.index bs 1) :: Word32
b2 = fromIntegral (BS.index bs 2) :: Word32
b3 = fromIntegral (BS.index bs 3) :: Word32
in (b0 `shiftL` 24) .|. (b1 `shiftL` 16) .|. (b2 `shiftL` 8) .|. b3
-- ---------------------------------------------------------------------------
-- Bundle verification
-- ---------------------------------------------------------------------------
verifyBundle :: Bundle -> Either String ()
verifyBundle bundle
| bundleVersion bundle < 1 = Left $ "unsupported bundle version: " ++ show (bundleVersion bundle)
| Seq.null (bundleNodes bundle) = Left "bundle has no nodes"
verifyBundle bundle = do
verifyManifestConstraints (bundleManifest bundle)
let nodeCount = fromIntegral $ Seq.length (bundleNodes bundle)
traverse_ (\idx -> when (idx >= nodeCount) $ Left $ "root index out of bounds: " ++ show idx)
(bundleRoots bundle)
traverse_ (\exp -> when (exportRoot exp >= nodeCount) $ Left $ "export index out of bounds: " ++ show (exportRoot exp))
(manifestExports $ bundleManifest bundle)
let verifyNode i node = case node of
BNLeaf -> Right ()
BNStem child -> do
when (child >= i) $ Left $ "stem at index " ++ show i ++ " references child " ++ show child
when (child >= nodeCount) $ Left $ "stem at index " ++ show i ++ " references child out of bounds"
Right ()
BNFork left right -> do
when (left >= i) $ Left $ "fork at index " ++ show i ++ " references left " ++ show left
when (right >= i) $ Left $ "fork at index " ++ show i ++ " references right " ++ show right
when (left >= nodeCount) $ Left $ "fork at index " ++ show i ++ " references left out of bounds"
when (right >= nodeCount) $ Left $ "fork at index " ++ show i ++ " references right out of bounds"
Right ()
mapM_ (\i -> case Seq.lookup (fromIntegral i) (bundleNodes bundle) of
Nothing -> Left $ "internal error: node " ++ show i ++ " not found"
Just node -> verifyNode i node) [0 :: Word32 .. nodeCount - 1]
let dupCheck = foldM (\seen (i, node) -> case node of
BNLeaf -> if Set.member (0 :: Word8, 0 :: Word32, 0 :: Word32) seen
then Left $ "duplicate leaf at index " ++ show i
else Right $ Set.insert (0, 0, 0) seen
BNStem child -> if Set.member (1, child, 0) seen
then Left $ "duplicate stem at index " ++ show i
else Right $ Set.insert (1, child, 0) seen
BNFork left right -> if Set.member (2, left, right) seen
then Left $ "duplicate fork at index " ++ show i
else Right $ Set.insert (2, left, right) seen) Set.empty (zip [0 :: Word32 ..] (Foldable.toList $ bundleNodes bundle))
_ <- dupCheck
Right ()
verifyManifestConstraints :: BundleManifest -> Either String ()
verifyManifestConstraints manifest = do
when (manifestSchema manifest /= "arboricx.bundle.manifest.v1") $
Left $ "unsupported manifest schema: " ++ unpack (manifestSchema manifest)
when (manifestBundleType manifest /= "tree-calculus-executable-object") $
Left $ "unsupported bundle type: " ++ unpack (manifestBundleType manifest)
let treeSpec = manifestTree manifest
hashSpec = treeNodeHash treeSpec
runtimeSpec = manifestRuntime manifest
when (treeCalculus treeSpec /= "tree-calculus.v1") $
Left $ "unsupported calculus: " ++ unpack (treeCalculus treeSpec)
when (nodeHashAlgorithm hashSpec /= "indexed") $
Left $ "unsupported node hash algorithm: " ++ unpack (nodeHashAlgorithm hashSpec)
when (nodeHashDomain hashSpec /= "arboricx.indexed.node.v1") $
Left $ "unsupported node hash domain: " ++ unpack (nodeHashDomain hashSpec)
when (treeNodePayload treeSpec /= "arboricx.indexed.payload.v1") $
Left $ "unsupported node payload: " ++ unpack (treeNodePayload treeSpec)
when (runtimeSemantics runtimeSpec /= "tree-calculus.v1") $
Left $ "unsupported runtime semantics: " ++ unpack (runtimeSemantics runtimeSpec)
when (runtimeAbi runtimeSpec /= "arboricx.abi.tree.v1") $
Left $ "unsupported runtime ABI: " ++ unpack (runtimeAbi runtimeSpec)
when (not (null (runtimeCapabilities runtimeSpec))) $
Left "unsupported runtime capabilities"
when (manifestClosure manifest /= ClosureComplete) $
Left "bundle requires closure = complete"
when (null $ manifestRoots manifest) $
Left "manifest has no roots"
when (null $ manifestExports manifest) $
Left "manifest has no exports"
traverse_ verifyExport (manifestExports manifest)
where
verifyExport exported = do
when (T.null $ exportName exported) $
Left "manifest export has empty name"
-- ---------------------------------------------------------------------------
-- Bundle reconstruction
-- ---------------------------------------------------------------------------
reconstructBundleTerms :: Seq BundleNode -> Vector T
reconstructBundleTerms nodes = V.create $ do
let n = Seq.length nodes
vec <- MV.new n
forM_ (zip [0 :: Int ..] (Foldable.toList nodes)) $ \(i, node) -> do
t <- case node of
BNLeaf -> return Leaf
BNStem child -> Stem <$> MV.read vec (fromIntegral child)
BNFork left right -> do
l <- MV.read vec (fromIntegral left)
r <- MV.read vec (fromIntegral right)
return $ Fork l r
MV.write vec i t
return vec
-- ---------------------------------------------------------------------------
-- Primitive binary helpers
-- ---------------------------------------------------------------------------
encode16 :: Word16 -> ByteString
encode16 w = BS.pack
[ fromIntegral (shiftR w 8)
, fromIntegral w
]
encode32 :: Word32 -> ByteString
encode32 w = BS.pack
[ fromIntegral (shiftR w 24)
, fromIntegral (shiftR w 16)
, fromIntegral (shiftR w 8)
, fromIntegral w
]
encode64 :: Word64 -> ByteString
encode64 w = BS.pack
[ fromIntegral (shiftR w 56)
, fromIntegral (shiftR w 48)
, fromIntegral (shiftR w 40)
, fromIntegral (shiftR w 32)
, fromIntegral (shiftR w 24)
, fromIntegral (shiftR w 16)
, fromIntegral (shiftR w 8)
, fromIntegral w
]
decode16be :: String -> ByteString -> Either String (Word16, ByteString)
decode16be label bs
| BS.length bs < 2 = Left (label ++ ": not enough bytes for u16")
| otherwise =
let b0 = fromIntegral (BS.index bs 0) :: Word16
b1 = fromIntegral (BS.index bs 1) :: Word16
in Right ((b0 `shiftL` 8) .|. b1, BS.drop 2 bs)
decode32be :: String -> ByteString -> Either String (Word32, ByteString)
decode32be label bs
| BS.length bs < 4 = Left (label ++ ": not enough bytes for u32")
| otherwise =
let b0 = fromIntegral (BS.index bs 0) :: Word32
b1 = fromIntegral (BS.index bs 1) :: Word32
b2 = fromIntegral (BS.index bs 2) :: Word32
b3 = fromIntegral (BS.index bs 3) :: Word32
in Right ((b0 `shiftL` 24) .|. (b1 `shiftL` 16) .|. (b2 `shiftL` 8) .|. b3, BS.drop 4 bs)
decode64be :: String -> ByteString -> Either String (Word64, ByteString)
decode64be label bs
| BS.length bs < 8 = Left (label ++ ": not enough bytes for u64")
| otherwise =
let b0 = fromIntegral (BS.index bs 0) :: Word64
b1 = fromIntegral (BS.index bs 1) :: Word64
b2 = fromIntegral (BS.index bs 2) :: Word64
b3 = fromIntegral (BS.index bs 3) :: Word64
b4 = fromIntegral (BS.index bs 4) :: Word64
b5 = fromIntegral (BS.index bs 5) :: Word64
b6 = fromIntegral (BS.index bs 6) :: Word64
b7 = fromIntegral (BS.index bs 7) :: Word64
in Right ((b0 `shiftL` 56) .|. (b1 `shiftL` 48) .|. (b2 `shiftL` 40) .|. (b3 `shiftL` 32)
.|. (b4 `shiftL` 24) .|. (b5 `shiftL` 16) .|. (b6 `shiftL` 8) .|. b7, BS.drop 8 bs)
-- ---------------------------------------------------------------------------
-- Helpers
-- ---------------------------------------------------------------------------
defaultExportNames :: Int -> [Text]
defaultExportNames n =
case n of
0 -> []
1 -> ["root"]
_ -> ["root" <> T.pack (show i) | i <- [0 :: Int .. n - 1]]