Switch manifest serialization to CBOR

Replace JSON-based bundle manifest with a CBOR-encoded format. The manifest
is now a canonical CBOR map with order-strict key decoding, raw 32-byte hash
payloads (instead of hex-encoded JSON), and compact binary representation.
This commit is contained in:
2026-05-07 21:41:50 -05:00
parent d9f25a2b5a
commit e3117e3ac8
23 changed files with 988 additions and 275 deletions

View File

@@ -18,9 +18,12 @@
* Offset 8B u64 BE
* Length 8B u64 BE
* SHA256Digest 32B raw
* Manifest: canonical CBOR-encoded map (cborg output from Haskell)
* Nodes: binary section
*/
import { createHash } from "node:crypto";
import { decodeCbor } from "./cbor.js";
// ── Constants ───────────────────────────────────────────────────────────────
@@ -170,12 +173,37 @@ export function parseBundle(buffer) {
}
/**
* Convenience: parse and return just the manifest JSON.
* Post-process a CBOR-decoded manifest to normalize hash fields
* from raw bytes to hex strings (matching the old JSON wire format).
*/
function normalizeManifest(raw) {
const tree = raw.tree;
if (tree && tree.nodeHash && tree.nodeHash.domain) {
tree.nodeHash.domain = tree.nodeHash.domain;
}
// Convert root hashes from raw bytes to hex
const roots = (raw.roots || []).map((r) => ({
...r,
hash: r.hash instanceof Uint8Array ? Buffer.from(r.hash).toString("hex") : r.hash,
}));
// Convert export root hashes from raw bytes to hex
const exports = (raw.exports || []).map((e) => ({
...e,
root: e.root instanceof Uint8Array ? Buffer.from(e.root).toString("hex") : e.root,
}));
return { ...raw, roots, exports };
}
/**
* Convenience: parse and return the manifest from CBOR.
*/
export function parseManifest(buffer) {
const bundle = parseBundle(buffer);
const manifestEntry = bundle.sections.get(SECTION_MANIFEST);
return JSON.parse(manifestEntry.data.toString("utf-8"));
return normalizeManifest(decodeCbor(manifestEntry.data));
}
/**

130
ext/js/src/cbor.js Normal file
View File

@@ -0,0 +1,130 @@
/**
* cbor.js — Minimal CBOR decoder for the Arborix manifest format.
*
* Decodes the canonical CBOR produced by the Haskell cborg library:
* - Maps: major type 5 (0xa0 + length)
* - Arrays: major type 4 (0x80 + length)
* - Text strings: major type 3, UTF-8 encoded
* - Byte strings: major type 2
* - Unsigned ints: major type 0
* - Simple values: 0xc2 = false, 0xc3 = true
*
* Only covers the subset needed for the manifest.
*/
// ── Decoding state ──────────────────────────────────────────────────────────
/**
* @param {Buffer} data
* @returns {number} remaining buffer
*/
function makeDecoder(data) {
let offset = 0;
return {
/** @returns {number} current offset */
getPos() { return offset; },
/** @returns {number} remaining bytes */
remaining() { return data.length - offset; },
/** @returns {number} total length */
length() { return data.length; },
/** Read N bytes and advance */
read(n) {
if (offset + n > data.length) {
throw new Error(`CBOR read: expected ${n} bytes, ${data.length - offset} remaining at offset ${offset}`);
}
const slice = data.slice(offset, offset + n);
offset += n;
return slice;
},
/** Read a single byte */
readByte() {
if (offset >= data.length) {
throw new Error(`CBOR readByte: no bytes remaining at offset ${offset}`);
}
return data[offset++];
},
};
}
// ── CBOR helpers ────────────────────────────────────────────────────────────
/**
* Read a CBOR length (major type initial byte encodes length for values < 24).
* For 24+, reads additional bytes per spec.
* @returns {number}
*/
function cborReadLength(dec, startByte) {
const additional = startByte & 0x1f;
if (additional < 24) return additional;
if (additional === 24) return dec.read(1)[0];
if (additional === 25) return dec.read(2).readUint16BE(0);
if (additional === 26) return dec.read(4).readUint32BE(0);
throw new Error(`CBOR: unsupported additional info ${additional}`);
}
// ── Top-level decode ────────────────────────────────────────────────────────
/**
* Decode a single CBOR value from buffer bytes.
* @param {Buffer} buf
* @returns {*}
*/
export function decodeCbor(buf) {
const dec = makeDecoder(buf);
const result = cborDecode(dec);
return result;
}
function cborDecode(dec) {
const first = dec.readByte();
const major = (first >> 5) & 0x07;
const info = first & 0x1f;
switch (major) {
case 0: // unsigned int
case 1: // negative int
return cborReadLength(dec, first);
case 2: // byte string
return dec.read(cborReadLength(dec, first));
case 3: // text string (UTF-8)
const len = cborReadLength(dec, first);
return dec.read(len).toString("utf-8");
case 4: // array
const arrLen = cborReadLength(dec, first);
const arr = [];
for (let i = 0; i < arrLen; i++) {
arr.push(cborDecode(dec));
}
return arr;
case 5: // map
const mapLen = cborReadLength(dec, first);
const map = {};
for (let i = 0; i < mapLen; i++) {
const key = cborDecode(dec);
const val = cborDecode(dec);
map[key] = val;
}
return map;
case 7: // simple values / floats
if (info === 20) return false;
if (info === 21) return true;
if (info === 22) return null; // undefined
if (info === 23) return null; // break (shouldn't appear in definite-length)
// 0xf9-fb are half/float/double floats — not used by our writer
throw new Error(`CBOR: unsupported simple value ${info}`);
default:
// Tags (major 6) and break (0xff) — not used in our manifest
throw new Error(`CBOR: unsupported major type ${major}, info ${info}`);
}
}

View File

@@ -33,7 +33,7 @@ export function validateManifest(manifest) {
`unsupported node hash algorithm: ${tree.nodeHash.algorithm}`
);
}
if (tree.nodeHash.domain !== "tricu.merkle.node.v1" && tree.nodeHash.domain !== "arborix.merkle.node.v1") {
if (tree.nodeHash.domain !== "arborix.merkle.node.v1") {
throw new Error(
`unsupported node hash domain: ${tree.nodeHash.domain}`
);

View File

@@ -7,14 +7,14 @@
* Fork: 0x02 || left_hash (32 bytes raw) || right_hash (32 bytes raw)
*
* Hash computation:
* hash = SHA256( "tricu.merkle.node.v1" || 0x00 || node_payload )
* hash = SHA256( "arborix.merkle.node.v1" || 0x00 || node_payload )
*/
import { createHash } from "node:crypto";
// ── Constants ───────────────────────────────────────────────────────────────
const DOMAIN_TAG = "tricu.merkle.node.v1";
const DOMAIN_TAG = "arborix.merkle.node.v1";
const HASH_LENGTH = 32; // raw hash bytes
const HEX_LENGTH = 64; // hex-encoded hash length