Fixes space leak by switching to objects

The integer-arena approach (parallel global arrays) never freed nodes,
causing 6GB+ memory usage when running the self-hosted kernel. PHP
arrays don't shrink and we have no GC for them.

- Replace int-arena with plain Node objects so PHP's refcounting GC
  reclaims unreachable subtrees automatically.
- Hash-cons Stem/Fork nodes to collapse duplicate immutable subtrees.
- Cache ofNumber(0..255) to avoid re-creating byte trees.
- Eliminate indirection (tag 4) nodes entirely; projection rules now
  eagerly reduce and copy tag/a/b into the original App node.
- Remove all deref() loops — every node is exactly what it says.

Memory: 6GB+ → 24MB for `id "hello"` with runArboricxToString kernel
This commit is contained in:
2026-05-10 16:56:00 -05:00
parent 1885c9b4ba
commit 8a673e282d
5 changed files with 209 additions and 133 deletions

View File

@@ -26,6 +26,7 @@ require __DIR__ . '/src/functions.php';
require __DIR__ . '/src/codecs.php';
require __DIR__ . '/src/kernel.php';
use Arboricx\Node;
use function Arboricx\{app, reduce, ofString, ofNumber, ofBytes, ofList,
formatTree, unwrapResult, unwrapHostValue, decodeHostPayload,
getRunArboricxToString};
@@ -67,7 +68,7 @@ function cmdRun(string $bundlePath, array $args): void
// Kernel application: runArboricxToString bundle args
$expr = app(app($kernel, $bundleTree), $argsTree);
fwrite(STDERR, "Reducing kernel application...\n");
$result = reduce($expr, 1_000_000_000);
$result = reduce($expr, 1_000_000_000_000);
// The kernel returns an ok/err pair. On ok, the value is a
// Host ABI envelope: Fork(tag_number, payload_tree).
@@ -96,7 +97,7 @@ function cmdRun(string $bundlePath, array $args): void
}
}
function encodeArg(string $arg): int
function encodeArg(string $arg): Node
{
return ctype_digit($arg) ? ofNumber((int)$arg) : ofString($arg);
}

View File

@@ -6,15 +6,24 @@ namespace Arboricx;
use function Arboricx\{leaf, stem, fork, isLeaf, isStem, isFork, reduce, stemChild, forkLeft, forkRight, same_tree, formatTree};
function ofNumber(int $n): int
$GLOBALS['ARB_NUM_CACHE'] = [];
function ofNumber(int $n): Node
{
if ($n < 0) throw new \InvalidArgumentException('ofNumber: negative values not supported');
if ($n === 0) return leaf();
if ($n < 256 && isset($GLOBALS['ARB_NUM_CACHE'][$n])) {
return $GLOBALS['ARB_NUM_CACHE'][$n];
}
$bitTree = ($n % 2) === 1 ? stem(leaf()) : leaf();
return fork($bitTree, ofNumber(intdiv($n, 2)));
$result = fork($bitTree, ofNumber(intdiv($n, 2)));
if ($n < 256) {
$GLOBALS['ARB_NUM_CACHE'][$n] = $result;
}
return $result;
}
function toNumber(int $tree): array
function toNumber(Node $tree): array
{
$tree = reduce($tree);
if (isLeaf($tree)) return [true, 0];
@@ -33,7 +42,7 @@ function toNumber(int $tree): array
return $ok ? [true, $bit + 2 * $rest] : [false, $rest];
}
function ofList(array $elements): int
function ofList(array $elements): Node
{
$result = leaf();
for ($i = count($elements) - 1; $i >= 0; $i--) {
@@ -42,7 +51,7 @@ function ofList(array $elements): int
return $result;
}
function toList(int $tree): array
function toList(Node $tree): array
{
$tree = reduce($tree);
if (isLeaf($tree)) return [true, []];
@@ -57,7 +66,7 @@ function toList(int $tree): array
/**
* Strings are lists of byte values (each character encoded as a number tree).
*/
function ofString(string $s): int
function ofString(string $s): Node
{
$bytes = [];
for ($i = 0, $len = strlen($s); $i < $len; $i++) {
@@ -66,7 +75,7 @@ function ofString(string $s): int
return ofList($bytes);
}
function toString(int $tree): array
function toString(Node $tree): array
{
[$ok, $elements] = toList($tree);
if (!$ok) return [false, $elements];
@@ -82,17 +91,17 @@ function toString(int $tree): array
return [true, $result];
}
function ofBytes(string $s): int
function ofBytes(string $s): Node
{
return ofString($s);
}
function toBytes(int $tree): array
function toBytes(Node $tree): array
{
return toString($tree);
}
function unwrapResult(int $tree): array
function unwrapResult(Node $tree): array
{
$tree = reduce($tree);
if (!isFork($tree)) return ['error', null, 'Result is not a valid ok/err pair'];
@@ -120,7 +129,7 @@ const HOST_BOOL_TAG = 3;
const HOST_LIST_TAG = 4;
const HOST_BYTES_TAG = 5;
function unwrapHostValue(int $tree): array
function unwrapHostValue(Node $tree): array
{
$tree = reduce($tree);
if (!isFork($tree)) throw new \InvalidArgumentException('Host ABI value must be a pair');
@@ -132,13 +141,13 @@ function unwrapHostValue(int $tree): array
return [(int)$tagNum, $payload];
}
function isBool(int $tree): bool
function isBool(Node $tree): bool
{
$tree = reduce($tree);
return isLeaf($tree) || (isStem($tree) && isLeaf(stemChild($tree)));
}
function decodeHostPayload(int $tag, int $payload): array
function decodeHostPayload(int $tag, Node $payload): array
{
switch ($tag) {
case HOST_TREE_TAG:

View File

@@ -5,185 +5,251 @@ declare(strict_types=1);
namespace Arboricx;
/**
* Arena-backed Tree Calculus graph.
* Node-based Tree Calculus graph.
*
* Nodes are plain PHP objects so the runtime's refcounting GC
* can reclaim unreachable subtrees automatically.
*
* Nodes are integers (refs) into four parallel arrays:
* tag 0 = Leaf
* tag 1 = Stem(child)
* tag 2 = Fork(left, right)
* tag 3 = App(function, argument) -- evaluator thunk
* tag 4 = Ind(target) -- update-in-place indirection for sharing
*
* Using ints instead of objects keeps the hot path allocation-free
* and lets us do cheap pointer-equality (same ref == same subtree).
*/
$GLOBALS['ARB_TAG'] = [0 => 0];
$GLOBALS['ARB_A'] = [0 => 0];
$GLOBALS['ARB_B'] = [0 => 0];
$GLOBALS['ARB_NEXT'] = 1;
function spendFuel(int &$fuel): void
final class Node
{
if ($fuel <= 0) throw new \RuntimeException('fuel exhausted');
$fuel--;
}
public int $tag;
public ?Node $a;
public ?Node $b;
function newNode(int $tag, int $a, int $b = 0): int
{
$id = $GLOBALS['ARB_NEXT']++;
$GLOBALS['ARB_TAG'][$id] = $tag;
$GLOBALS['ARB_A'][$id] = $a;
$GLOBALS['ARB_B'][$id] = $b;
return $id;
}
/**
* Chase indirection (tag 4) chains to the real node.
* Self-loops are treated as terminal to avoid infinite loops.
*/
function deref(int $t): int
{
while ($t !== 0 && ($GLOBALS['ARB_TAG'][$t] ?? -1) === 4) {
$next = $GLOBALS['ARB_A'][$t];
if ($next === $t) break;
$t = $next;
}
return $t;
}
/**
* Overwrite a node with an indirection to its reduced form.
* This short-circuits future reads so shared sub-terms are only
* reduced once.
*/
function setInd(int $o, int $t): void
{
if ($o !== $t) {
$GLOBALS['ARB_TAG'][$o] = 4;
$GLOBALS['ARB_A'][$o] = $t;
public function __construct(int $tag = 0, ?Node $a = null, ?Node $b = null)
{
$this->tag = $tag;
$this->a = $a;
$this->b = $b;
}
}
function leaf(): int { return 0; }
function stem(int $c): int { return newNode(1, $c); }
function fork(int $l, int $r): int { return newNode(2, $l, $r); }
function app(int $f, int $x): int { return newNode(3, $f, $x); }
$GLOBALS['ARB_LEAF'] = new Node();
$GLOBALS['ARB_CONS'] = [];
function tag(int $t): int { $t = deref($t); return $GLOBALS['ARB_TAG'][$t] ?? -1; }
function isLeaf(int $t): bool { return deref($t) === 0; }
function isStem(int $t): bool { return tag($t) === 1; }
function isFork(int $t): bool { return tag($t) === 2; }
function isApp(int $t): bool { return tag($t) === 3; }
function newNode(int $tag, ?Node $a = null, ?Node $b = null): Node
{
if ($tag !== 3) {
$key = $tag === 1
? '1:' . spl_object_id($a)
: '2:' . spl_object_id($a) . ':' . spl_object_id($b);
if (isset($GLOBALS['ARB_CONS'][$key])) {
return $GLOBALS['ARB_CONS'][$key];
}
$n = new Node($tag, $a, $b);
$GLOBALS['ARB_CONS'][$key] = $n;
return $n;
}
function stemChild(int $t): int { $t = deref($t); return $GLOBALS['ARB_A'][$t]; }
function forkLeft(int $t): int { $t = deref($t); return $GLOBALS['ARB_A'][$t]; }
function forkRight(int $t): int { $t = deref($t); return $GLOBALS['ARB_B'][$t]; }
function appFunc(int $t): int { $t = deref($t); return $GLOBALS['ARB_A'][$t]; }
function appArg(int $t): int { $t = deref($t); return $GLOBALS['ARB_B'][$t]; }
return new Node($tag, $a, $b);
}
/**
* Reduce a term to weak-head normal form (WHNF).
*
* The evaluator is iterative (no PHP stack growth) and reduces
* left-to-right: function first, then argument when needed.
* Fuel prevents infinite loops on divergent terms.
*/
function reduce(int $term, int $fuel = 100_000_000_000_000_000): int
function leaf(): Node
{
return $GLOBALS['ARB_LEAF'];
}
function stem(Node $c): Node
{
return newNode(1, $c);
}
function fork(Node $l, Node $r): Node
{
return newNode(2, $l, $r);
}
function app(Node $f, Node $x): Node
{
return newNode(3, $f, $x);
}
function tag(Node $t): int
{
return $t->tag;
}
function isLeaf(Node $t): bool
{
return $t->tag === 0;
}
function isStem(Node $t): bool
{
return $t->tag === 1;
}
function isFork(Node $t): bool
{
return $t->tag === 2;
}
function isApp(Node $t): bool
{
return $t->tag === 3;
}
function stemChild(Node $t): Node
{
return $t->a;
}
function forkLeft(Node $t): Node
{
return $t->a;
}
function forkRight(Node $t): Node
{
return $t->b;
}
function appFunc(Node $t): Node
{
return $t->a;
}
function appArg(Node $t): Node
{
return $t->b;
}
function reduce(Node $term, int $fuel = 100_000_000_000_000_000): Node
{
return whnf($term, $fuel);
}
function whnf(int $term, int &$fuel): int
function whnf(Node $term, int &$fuel): Node
{
$TAG =& $GLOBALS['ARB_TAG'];
$A =& $GLOBALS['ARB_A'];
$B =& $GLOBALS['ARB_B'];
while (true) {
$term = deref($term);
if ($TAG[$term] !== 3) return $term;
if ($term->tag !== 3) {
return $term;
}
$orig = $term;
$f = whnf($A[$term], $fuel);
$x = $B[$term];
$f = deref($f);
$ftag = $TAG[$f];
$f = whnf($term->a, $fuel);
$x = $term->b;
$ftag = $f->tag;
// apply Leaf b = Stem b
if ($f === 0) {
$r = newNode(1, $x); setInd($orig, $r); return $r;
if ($ftag === 0) {
$orig->tag = 1;
$orig->a = $x;
$orig->b = null;
return $orig;
}
// apply (Stem a) b = Fork a b
if ($ftag === 1) {
$r = newNode(2, $A[$f], $x); setInd($orig, $r); return $r;
$orig->tag = 2;
$orig->a = $f->a;
$orig->b = $x;
return $orig;
}
if ($ftag !== 2) throw new \RuntimeException('apply: function did not reduce to tree');
if ($ftag !== 2) {
throw new \RuntimeException('apply: function did not reduce to tree');
}
$left = whnf($A[$f], $fuel);
$right = $B[$f];
$left = deref($left);
$ltag = $TAG[$left];
$left = whnf($f->a, $fuel);
$right = $f->b;
$ltag = $left->tag;
// apply (Fork Leaf a) _ = a
if ($left === 0) {
$term = $right; setInd($orig, $term); spendFuel($fuel); continue;
if ($ltag === 0) {
$result = whnf($right, $fuel);
if ($orig !== $result) {
$orig->tag = $result->tag;
$orig->a = $result->a;
$orig->b = $result->b;
}
if ($fuel <= 0) throw new \RuntimeException('fuel exhausted');
$fuel--;
return $orig;
}
// apply (Fork (Stem a) b) c = (a c) (b c)
if ($ltag === 1) {
$term = newNode(3, newNode(3, $A[$left], $x), newNode(3, $right, $x));
setInd($orig, $term); spendFuel($fuel); continue;
$inner1 = newNode(3, $left->a, $x);
$inner2 = newNode(3, $right, $x);
$orig->tag = 3;
$orig->a = $inner1;
$orig->b = $inner2;
$term = $orig;
if ($fuel <= 0) throw new \RuntimeException('fuel exhausted');
$fuel--;
continue;
}
if ($ltag !== 2) throw new \RuntimeException('apply: invalid Fork left child');
if ($ltag !== 2) {
throw new \RuntimeException('apply: invalid Fork left child');
}
$arg = whnf($x, $fuel);
$arg = deref($arg);
$atag = $TAG[$arg];
$atag = $arg->tag;
// apply (Fork (Fork a b) c) Leaf = a
if ($arg === 0) {
$term = $A[$left]; setInd($orig, $term); spendFuel($fuel); continue;
if ($atag === 0) {
$result = whnf($left->a, $fuel);
if ($orig !== $result) {
$orig->tag = $result->tag;
$orig->a = $result->a;
$orig->b = $result->b;
}
if ($fuel <= 0) throw new \RuntimeException('fuel exhausted');
$fuel--;
return $orig;
}
// apply (Fork (Fork a b) c) (Stem u) = b u
if ($atag === 1) {
$term = newNode(3, $B[$left], $A[$arg]);
setInd($orig, $term); spendFuel($fuel); continue;
$orig->tag = 3;
$orig->a = $left->b;
$orig->b = $arg->a;
$term = $orig;
if ($fuel <= 0) throw new \RuntimeException('fuel exhausted');
$fuel--;
continue;
}
// apply (Fork (Fork a b) c) (Fork u v) = (c u) v
if ($atag === 2) {
$term = newNode(3, newNode(3, $right, $A[$arg]), $B[$arg]);
setInd($orig, $term); spendFuel($fuel); continue;
$inner = newNode(3, $right, $arg->a);
$orig->tag = 3;
$orig->a = $inner;
$orig->b = $arg->b;
$term = $orig;
if ($fuel <= 0) throw new \RuntimeException('fuel exhausted');
$fuel--;
continue;
}
throw new \RuntimeException('apply: argument did not reduce to tree');
}
}
function same_tree(int $a, int $b): bool
function same_tree(Node $a, Node $b): bool
{
$a = deref($a); $b = deref($b);
if ($a === $b) return true;
if (tag($a) !== tag($b)) return false;
if (isLeaf($a)) return true;
if (isStem($a)) return same_tree(stemChild($a), stemChild($b));
if (isFork($a)) return same_tree(forkLeft($a), forkLeft($b)) && same_tree(forkRight($a), forkRight($b));
if (isApp($a)) return same_tree(appFunc($a), appFunc($b)) && same_tree(appArg($a), appArg($b));
if ($a->tag !== $b->tag) return false;
if ($a->tag === 0) return true;
if ($a->tag === 1) return same_tree($a->a, $b->a);
if ($a->tag === 2) return same_tree($a->a, $b->a) && same_tree($a->b, $b->b);
if ($a->tag === 3) return same_tree($a->a, $b->a) && same_tree($a->b, $b->b);
return false;
}
function formatTree(int $t, int $depth = 0): string
function formatTree(Node $t, int $depth = 0): string
{
$t = deref($t);
if ($depth > 200) return '...';
if (isLeaf($t)) return 'Leaf';
if (isStem($t)) return 'Stem(' . formatTree(stemChild($t), $depth + 1) . ')';
if (isFork($t)) return 'Fork(' . formatTree(forkLeft($t), $depth + 1) . ', ' . formatTree(forkRight($t), $depth + 1) . ')';
if (isApp($t)) return 'App(' . formatTree(appFunc($t), $depth + 1) . ', ' . formatTree(appArg($t), $depth + 1) . ')';
if ($t->tag === 0) return 'Leaf';
if ($t->tag === 1) return 'Stem(' . formatTree($t->a, $depth + 1) . ')';
if ($t->tag === 2) return 'Fork(' . formatTree($t->a, $depth + 1) . ', ' . formatTree($t->b, $depth + 1) . ')';
if ($t->tag === 3) return 'App(' . formatTree($t->a, $depth + 1) . ', ' . formatTree($t->b, $depth + 1) . ')';
return 'Unknown';
}

View File

@@ -26,7 +26,7 @@ function loadKernelTernary(string $name): string
return $content !== false ? trim($content) : '';
}
function parseTernary(string $s, int &$pos = 0): int
function parseTernary(string $s, int &$pos = 0): Node
{
if ($pos >= strlen($s)) {
throw new \RuntimeException('parseTernary: unexpected end of string');
@@ -40,7 +40,7 @@ function parseTernary(string $s, int &$pos = 0): int
};
}
function getRunArboricxToString(): ?int
function getRunArboricxToString(): ?Node
{
$term = loadKernelTernary('kernel_run_arboricx_to_string');
if ($term === '') return null;