Switch manifest serialization to CBOR

Replace JSON-based bundle manifest with a CBOR-encoded format. The manifest
is now a canonical CBOR map with order-strict key decoding, raw 32-byte hash
payloads (instead of hex-encoded JSON), and compact binary representation.
This commit is contained in:
2026-05-07 21:41:50 -05:00
parent d9f25a2b5a
commit e3117e3ac8
23 changed files with 988 additions and 275 deletions

View File

@@ -18,9 +18,12 @@
* Offset 8B u64 BE
* Length 8B u64 BE
* SHA256Digest 32B raw
* Manifest: canonical CBOR-encoded map (cborg output from Haskell)
* Nodes: binary section
*/
import { createHash } from "node:crypto";
import { decodeCbor } from "./cbor.js";
// ── Constants ───────────────────────────────────────────────────────────────
@@ -170,12 +173,37 @@ export function parseBundle(buffer) {
}
/**
* Convenience: parse and return just the manifest JSON.
* Post-process a CBOR-decoded manifest to normalize hash fields
* from raw bytes to hex strings (matching the old JSON wire format).
*/
function normalizeManifest(raw) {
const tree = raw.tree;
if (tree && tree.nodeHash && tree.nodeHash.domain) {
tree.nodeHash.domain = tree.nodeHash.domain;
}
// Convert root hashes from raw bytes to hex
const roots = (raw.roots || []).map((r) => ({
...r,
hash: r.hash instanceof Uint8Array ? Buffer.from(r.hash).toString("hex") : r.hash,
}));
// Convert export root hashes from raw bytes to hex
const exports = (raw.exports || []).map((e) => ({
...e,
root: e.root instanceof Uint8Array ? Buffer.from(e.root).toString("hex") : e.root,
}));
return { ...raw, roots, exports };
}
/**
* Convenience: parse and return the manifest from CBOR.
*/
export function parseManifest(buffer) {
const bundle = parseBundle(buffer);
const manifestEntry = bundle.sections.get(SECTION_MANIFEST);
return JSON.parse(manifestEntry.data.toString("utf-8"));
return normalizeManifest(decodeCbor(manifestEntry.data));
}
/**

130
ext/js/src/cbor.js Normal file
View File

@@ -0,0 +1,130 @@
/**
* cbor.js — Minimal CBOR decoder for the Arborix manifest format.
*
* Decodes the canonical CBOR produced by the Haskell cborg library:
* - Maps: major type 5 (0xa0 + length)
* - Arrays: major type 4 (0x80 + length)
* - Text strings: major type 3, UTF-8 encoded
* - Byte strings: major type 2
* - Unsigned ints: major type 0
* - Simple values: 0xc2 = false, 0xc3 = true
*
* Only covers the subset needed for the manifest.
*/
// ── Decoding state ──────────────────────────────────────────────────────────
/**
* @param {Buffer} data
* @returns {number} remaining buffer
*/
function makeDecoder(data) {
let offset = 0;
return {
/** @returns {number} current offset */
getPos() { return offset; },
/** @returns {number} remaining bytes */
remaining() { return data.length - offset; },
/** @returns {number} total length */
length() { return data.length; },
/** Read N bytes and advance */
read(n) {
if (offset + n > data.length) {
throw new Error(`CBOR read: expected ${n} bytes, ${data.length - offset} remaining at offset ${offset}`);
}
const slice = data.slice(offset, offset + n);
offset += n;
return slice;
},
/** Read a single byte */
readByte() {
if (offset >= data.length) {
throw new Error(`CBOR readByte: no bytes remaining at offset ${offset}`);
}
return data[offset++];
},
};
}
// ── CBOR helpers ────────────────────────────────────────────────────────────
/**
* Read a CBOR length (major type initial byte encodes length for values < 24).
* For 24+, reads additional bytes per spec.
* @returns {number}
*/
function cborReadLength(dec, startByte) {
const additional = startByte & 0x1f;
if (additional < 24) return additional;
if (additional === 24) return dec.read(1)[0];
if (additional === 25) return dec.read(2).readUint16BE(0);
if (additional === 26) return dec.read(4).readUint32BE(0);
throw new Error(`CBOR: unsupported additional info ${additional}`);
}
// ── Top-level decode ────────────────────────────────────────────────────────
/**
* Decode a single CBOR value from buffer bytes.
* @param {Buffer} buf
* @returns {*}
*/
export function decodeCbor(buf) {
const dec = makeDecoder(buf);
const result = cborDecode(dec);
return result;
}
function cborDecode(dec) {
const first = dec.readByte();
const major = (first >> 5) & 0x07;
const info = first & 0x1f;
switch (major) {
case 0: // unsigned int
case 1: // negative int
return cborReadLength(dec, first);
case 2: // byte string
return dec.read(cborReadLength(dec, first));
case 3: // text string (UTF-8)
const len = cborReadLength(dec, first);
return dec.read(len).toString("utf-8");
case 4: // array
const arrLen = cborReadLength(dec, first);
const arr = [];
for (let i = 0; i < arrLen; i++) {
arr.push(cborDecode(dec));
}
return arr;
case 5: // map
const mapLen = cborReadLength(dec, first);
const map = {};
for (let i = 0; i < mapLen; i++) {
const key = cborDecode(dec);
const val = cborDecode(dec);
map[key] = val;
}
return map;
case 7: // simple values / floats
if (info === 20) return false;
if (info === 21) return true;
if (info === 22) return null; // undefined
if (info === 23) return null; // break (shouldn't appear in definite-length)
// 0xf9-fb are half/float/double floats — not used by our writer
throw new Error(`CBOR: unsupported simple value ${info}`);
default:
// Tags (major 6) and break (0xff) — not used in our manifest
throw new Error(`CBOR: unsupported major type ${major}, info ${info}`);
}
}

View File

@@ -33,7 +33,7 @@ export function validateManifest(manifest) {
`unsupported node hash algorithm: ${tree.nodeHash.algorithm}`
);
}
if (tree.nodeHash.domain !== "tricu.merkle.node.v1" && tree.nodeHash.domain !== "arborix.merkle.node.v1") {
if (tree.nodeHash.domain !== "arborix.merkle.node.v1") {
throw new Error(
`unsupported node hash domain: ${tree.nodeHash.domain}`
);

View File

@@ -7,14 +7,14 @@
* Fork: 0x02 || left_hash (32 bytes raw) || right_hash (32 bytes raw)
*
* Hash computation:
* hash = SHA256( "tricu.merkle.node.v1" || 0x00 || node_payload )
* hash = SHA256( "arborix.merkle.node.v1" || 0x00 || node_payload )
*/
import { createHash } from "node:crypto";
// ── Constants ───────────────────────────────────────────────────────────────
const DOMAIN_TAG = "tricu.merkle.node.v1";
const DOMAIN_TAG = "arborix.merkle.node.v1";
const HASH_LENGTH = 32; // raw hash bytes
const HEX_LENGTH = 64; // hex-encoded hash length

View File

@@ -1,5 +1,6 @@
import { readFileSync } from "node:fs";
import { strictEqual, ok, throws } from "node:assert";
import { createHash } from "node:crypto";
import { describe, it } from "node:test";
import {
parseBundle,
@@ -13,12 +14,12 @@ import {
parseNodeSection as parseNodes,
} from "../src/merkle.js";
const fixtureDir = "test/fixtures";
const fixtureDir = "../../test/fixtures";
describe("bundle parsing", () => {
it("valid bundle parses header and sections", () => {
const bundle = parseBundle(
readFileSync(`${fixtureDir}/id.tri.bundle`)
readFileSync(`${fixtureDir}/id.arborix`)
);
strictEqual(bundle.version, "1.0");
strictEqual(bundle.sectionCount, 2);
@@ -26,15 +27,16 @@ describe("bundle parsing", () => {
ok(bundle.sections.has(2)); // nodes
});
it("parseManifest returns valid JSON", () => {
it("parseManifest returns valid manifest", () => {
const manifest = parseManifest(
readFileSync(`${fixtureDir}/id.tri.bundle`)
readFileSync(`${fixtureDir}/id.arborix`)
);
strictEqual(manifest.schema, "arborix.bundle.manifest.v1");
strictEqual(manifest.bundleType, "tree-calculus-executable-object");
strictEqual(manifest.closure, "complete");
strictEqual(manifest.tree.calculus, "tree-calculus.v1");
strictEqual(manifest.tree.nodeHash.algorithm, "sha256");
strictEqual(manifest.tree.nodeHash.domain, "arborix.merkle.node.v1");
strictEqual(manifest.runtime.semantics, "tree-calculus.v1");
strictEqual(manifest.runtime.abi, "arborix.abi.tree.v1");
});
@@ -43,7 +45,7 @@ describe("bundle parsing", () => {
describe("hash verification", () => {
it("valid bundle nodes verify", () => {
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/id.tri.bundle`)
readFileSync(`${fixtureDir}/id.arborix`)
);
const { nodeMap } = parseNodes(data);
const { verified } = verifyNodeHashes(nodeMap);
@@ -64,4 +66,69 @@ describe("errors", () => {
buf.writeUInt16BE(2, 8); // major version 2
throws(() => parseBundle(buf), /unsupported bundle major version/);
});
it("bad section digest fails", () => {
const buf = readFileSync(`${fixtureDir}/id.arborix`);
// Corrupt one byte in the manifest section
buf[152] ^= 0x01;
throws(() => parseBundle(buf), /digest mismatch/);
});
it("truncated bundle fails", () => {
const buf = readFileSync(`${fixtureDir}/id.arborix`);
const truncated = buf.slice(0, 40);
throws(() => parseBundle(truncated), /truncated/);
});
it("missing nodes section fails", () => {
// Build a bundle with only manifest entry in the directory (1 section instead of 2)
const header = Buffer.alloc(32, 0);
header.write("ARBORIX\0", 0, 8);
header.writeUInt16BE(1, 8); // major version
header.writeUInt16BE(0, 10); // minor version
header.writeUInt32BE(1, 12); // 1 section
// Build a manifest JSON
const manifestObj = {
schema: "arborix.bundle.manifest.v1",
bundleType: "tree-calculus-executable-object",
tree: {
calculus: "tree-calculus.v1",
nodeHash: {
algorithm: "sha256",
domain: "arborix.merkle.node.v1"
},
nodePayload: "arborix.merkle.payload.v1"
},
runtime: {
semantics: "tree-calculus.v1",
evaluation: "normal-order",
abi: "arborix.abi.tree.v1",
capabilities: []
},
closure: "complete",
roots: [{ hash: Buffer.alloc(32).toString("hex"), role: "default" }],
exports: [{ name: "root", root: Buffer.alloc(32).toString("hex"), kind: "term", abi: "arborix.abi.tree.v1" }],
metadata: { createdBy: "arborix" }
};
const manifestJson = JSON.stringify(manifestObj);
const manifestBytes = Buffer.from(manifestJson);
// Section directory entry (60 bytes, all fields are u64 after the u16s)
const entry = Buffer.alloc(60, 0);
entry.writeUInt32BE(1, 0); // type: manifest
entry.writeUInt16BE(1, 4); // version
entry.writeUInt16BE(1, 6); // flags: critical
entry.writeUInt16BE(0, 8); // compression: none
entry.writeUInt16BE(1, 10); // digest algorithm: sha256
entry.writeBigUInt64BE(BigInt(32 + 60), 12); // offset (u64)
entry.writeBigUInt64BE(BigInt(manifestBytes.length), 20); // length (u64)
entry.set(createHash("sha256").update(manifestBytes).digest(), 28); // digest (32 bytes)
// Set dirOffset to 32 so parseBundle reads directory from after header
header.writeBigUInt64BE(BigInt(32), 24);
const bundleBuf = Buffer.concat([header, entry, manifestBytes]);
throws(() => parseBundle(bundleBuf), /missing required section/);
});
});

View File

@@ -1,13 +1,14 @@
import { readFileSync } from "node:fs";
import { strictEqual, ok } from "node:assert";
import { describe, it } from "node:test";
import { parseNodeSection } from "../src/bundle.js";
import { parseNodeSection as bundleParseNodeSection, parseBundle, parseManifest } from "../src/bundle.js";
import {
verifyNodeHashes,
verifyClosure,
verifyRootClosure,
deserializePayload,
computeNodeHash,
parseNodeSection,
} from "../src/merkle.js";
describe("merkle — deserializePayload", () => {
@@ -49,46 +50,70 @@ describe("merkle — computeNodeHash", () => {
const hash = computeNodeHash(leaf);
strictEqual(hash.length, 64);
});
it("Leaf hash matches expected Arborix domain", () => {
const leaf = { type: "leaf" };
const hash = computeNodeHash(leaf);
strictEqual(hash, "e54db458aa8e94782f7c61ad6c1f19a1c0c6fca7ffe53674f0d2bc5ff7ab02ff");
});
});
describe("merkle — node section parsing", () => {
const fixtureDir = "test/fixtures";
const fixtureDir = "../../test/fixtures";
it("parses id.tri.bundle with correct node count", () => {
const data = parseNodeSection(
readFileSync(`${fixtureDir}/id.tri.bundle`)
it("parses id.arborix with correct node count", () => {
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/id.arborix`)
);
const { nodeMap } = parseNodes(data);
const { nodeMap } = parseNodeSection(data);
strictEqual(nodeMap.size, 4);
});
it("parses true.tri.bundle with correct node count", () => {
const data = parseNodeSection(
readFileSync(`${fixtureDir}/true.tri.bundle`)
it("parses true.arborix with correct node count", () => {
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/true.arborix`)
);
const { nodeMap } = parseNodes(data);
const { nodeMap } = parseNodeSection(data);
strictEqual(nodeMap.size, 2);
});
it("parses false.arborix with correct node count", () => {
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/false.arborix`)
);
const { nodeMap } = parseNodeSection(data);
strictEqual(nodeMap.size, 1);
});
});
describe("merkle — hash verification", () => {
const fixtureDir = "test/fixtures";
const fixtureDir = "../../test/fixtures";
it("id.tri.bundle nodes all verify", () => {
const data = parseNodeSection(
readFileSync(`${fixtureDir}/id.tri.bundle`)
it("id.arborix nodes all verify", () => {
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/id.arborix`)
);
const { nodeMap } = parseNodes(data);
const { nodeMap } = parseNodeSection(data);
const { verified, mismatches } = verifyNodeHashes(nodeMap);
ok(verified, "id.tri.bundle node hashes should verify");
ok(verified, "id.arborix node hashes should verify");
strictEqual(mismatches.length, 0);
});
it("true.arborix nodes all verify", () => {
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/true.arborix`)
);
const { nodeMap } = parseNodeSection(data);
const { verified, mismatches } = verifyNodeHashes(nodeMap);
ok(verified, "true.arborix node hashes should verify");
strictEqual(mismatches.length, 0);
});
it("corrupted node payload fails hash verification", () => {
const data = parseNodeSection(
readFileSync(`${fixtureDir}/id.tri.bundle`)
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/id.arborix`)
);
const { nodeMap } = parseNodes(data);
const { nodeMap } = parseNodeSection(data);
// Find a stem node to corrupt
let stemKey = null;
for (const [key, node] of nodeMap) {
@@ -110,32 +135,39 @@ describe("merkle — hash verification", () => {
});
describe("merkle — closure verification", () => {
const fixtureDir = "test/fixtures";
const fixtureDir = "../../test/fixtures";
it("id.tri.bundle has complete closure", () => {
const data = parseNodeSection(
readFileSync(`${fixtureDir}/id.tri.bundle`)
it("id.arborix has complete closure", () => {
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/id.arborix`)
);
const { nodeMap } = parseNodes(data);
const { nodeMap } = parseNodeSection(data);
const { complete, missing } = verifyClosure(nodeMap);
ok(complete, "id.tri.bundle should have complete closure");
ok(complete, "id.arborix should have complete closure");
strictEqual(missing.length, 0);
});
it("verifyRootClosure checks transitive reachability", () => {
const data = parseNodeSection(
readFileSync(`${fixtureDir}/id.tri.bundle`)
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/id.arborix`)
);
const { nodeMap } = parseNodes(data);
const rootHash = "039cc9aacf5be78ec1975713e6ad154a36988e3f3df18589b0d0c801d0825d78";
const { nodeMap } = parseNodeSection(data);
// Use the actual root hash from the fixture's manifest
const manifest = parseManifest(readFileSync(`${fixtureDir}/id.arborix`));
const rootHash = manifest.exports[0].root;
const { complete, missingRoots } = verifyRootClosure(nodeMap, rootHash);
ok(complete, "root should be reachable");
strictEqual(missingRoots.length, 0);
});
});
// Helper import
import { parseNodeSection as parseNodes } from "../src/merkle.js";
it("parseNodeSection returns correct node count", () => {
const data = bundleParseNodeSection(
readFileSync(`${fixtureDir}/id.arborix`)
);
const result = parseNodeSection(data);
strictEqual(result.count, 4);
});
});
// Helper for throws
function throws(fn, expected) {

View File

@@ -7,10 +7,10 @@ import { validateManifest, selectExport } from "../src/manifest.js";
import { verifyNodeHashes, parseNodeSection as parseNodes } from "../src/merkle.js";
import { buildTreeFromNodeMap } from "../src/cli.js";
const fixtureDir = "test/fixtures";
const fixtureDir = "../../test/fixtures";
describe("run bundle — id.tri.bundle", () => {
const bundle = readFileSync(`${fixtureDir}/id.tri.bundle`);
describe("run bundle — id.arborix", () => {
const bundle = readFileSync(`${fixtureDir}/id.arborix`);
const manifest = parseManifest(bundle);
const nodeSectionData = bundleParseNodeSection(bundle);
const { nodeMap } = parseNodes(nodeSectionData);
@@ -24,25 +24,21 @@ describe("run bundle — id.tri.bundle", () => {
ok(verified);
});
it("export 'id' is selectable", () => {
const exp = selectExport(manifest, "id");
strictEqual(exp.name, "id");
it("export 'root' is selectable", () => {
const exp = selectExport(manifest, "root");
strictEqual(exp.name, "root");
});
it("tree reconstructs as a Fork", () => {
const exp = selectExport(manifest, "id");
const exp = selectExport(manifest, "root");
const tree = buildTreeFromNodeMap(nodeMap, exp.root);
ok(Array.isArray(tree));
// id = t (t t) = Fork (Stem Leaf) Leaf...
// In Haskell: id = S = t (t (t t)) t
// This is Fork (Fork (Fork Leaf Leaf) Leaf) Leaf
// In array form: [[[], []], [], []]
ok(tree.length >= 2, "tree should be a Fork (length >= 2)");
});
});
describe("run bundle — true.tri.bundle", () => {
const bundle = readFileSync(`${fixtureDir}/true.tri.bundle`);
describe("run bundle — true.arborix", () => {
const bundle = readFileSync(`${fixtureDir}/true.arborix`);
const manifest = parseManifest(bundle);
const nodeSectionData = bundleParseNodeSection(bundle);
const { nodeMap } = parseNodes(nodeSectionData);
@@ -51,20 +47,60 @@ describe("run bundle — true.tri.bundle", () => {
validateManifest(manifest);
});
it("export 'const' is selectable", () => {
const exp = selectExport(manifest, "const");
strictEqual(exp.name, "const");
it("export 'root' is selectable", () => {
const exp = selectExport(manifest, "root");
strictEqual(exp.name, "root");
});
it("tree reconstructs", () => {
const exp = selectExport(manifest, "const");
it("tree reconstructs as Stem Leaf", () => {
const exp = selectExport(manifest, "root");
const tree = buildTreeFromNodeMap(nodeMap, exp.root);
ok(Array.isArray(tree));
strictEqual(tree.length, 1, "true should be a Stem (single child)");
strictEqual(tree[0].length, 0, "child should be Leaf");
});
});
describe("run bundle — false.arborix", () => {
const bundle = readFileSync(`${fixtureDir}/false.arborix`);
const manifest = parseManifest(bundle);
const nodeSectionData = bundleParseNodeSection(bundle);
const { nodeMap } = parseNodes(nodeSectionData);
it("manifest validates", () => {
validateManifest(manifest);
});
it("export 'root' is selectable", () => {
const exp = selectExport(manifest, "root");
strictEqual(exp.name, "root");
});
it("tree reconstructs as Leaf", () => {
const exp = selectExport(manifest, "root");
const tree = buildTreeFromNodeMap(nodeMap, exp.root);
strictEqual(tree.length, 0, "false should be Leaf (empty array)");
});
});
describe("run bundle — notQ.arborix", () => {
const bundle = readFileSync(`${fixtureDir}/notQ.arborix`);
const manifest = parseManifest(bundle);
const nodeSectionData = bundleParseNodeSection(bundle);
const { nodeMap } = parseNodes(nodeSectionData);
it("manifest validates", () => {
validateManifest(manifest);
});
it("node hashes verify", () => {
const { verified } = verifyNodeHashes(nodeMap);
ok(verified);
});
});
describe("run bundle — missing export", () => {
const bundle = readFileSync(`${fixtureDir}/id.tri.bundle`);
const bundle = readFileSync(`${fixtureDir}/id.arborix`);
const manifest = parseManifest(bundle);
it("nonexistent export fails clearly", () => {
@@ -73,8 +109,8 @@ describe("run bundle — missing export", () => {
});
describe("run bundle — auto-select", () => {
// true.tri.bundle has only one export, should auto-select
const bundle = readFileSync(`${fixtureDir}/true.tri.bundle`);
// true.arborix has only one export, should auto-select
const bundle = readFileSync(`${fixtureDir}/true.arborix`);
const manifest = parseManifest(bundle);
it("single export auto-selects", () => {