diff --git a/ext/php/run.php b/ext/php/run.php index 82bc289..782991e 100644 --- a/ext/php/run.php +++ b/ext/php/run.php @@ -26,6 +26,7 @@ require __DIR__ . '/src/functions.php'; require __DIR__ . '/src/codecs.php'; require __DIR__ . '/src/kernel.php'; +use Arboricx\Node; use function Arboricx\{app, reduce, ofString, ofNumber, ofBytes, ofList, formatTree, unwrapResult, unwrapHostValue, decodeHostPayload, getRunArboricxToString}; @@ -67,7 +68,7 @@ function cmdRun(string $bundlePath, array $args): void // Kernel application: runArboricxToString bundle args $expr = app(app($kernel, $bundleTree), $argsTree); fwrite(STDERR, "Reducing kernel application...\n"); - $result = reduce($expr, 1_000_000_000); + $result = reduce($expr, 1_000_000_000_000); // The kernel returns an ok/err pair. On ok, the value is a // Host ABI envelope: Fork(tag_number, payload_tree). @@ -96,7 +97,7 @@ function cmdRun(string $bundlePath, array $args): void } } -function encodeArg(string $arg): int +function encodeArg(string $arg): Node { return ctype_digit($arg) ? ofNumber((int)$arg) : ofString($arg); } diff --git a/ext/php/src/codecs.php b/ext/php/src/codecs.php index 2e144c8..0a0c3d8 100644 --- a/ext/php/src/codecs.php +++ b/ext/php/src/codecs.php @@ -6,15 +6,24 @@ namespace Arboricx; use function Arboricx\{leaf, stem, fork, isLeaf, isStem, isFork, reduce, stemChild, forkLeft, forkRight, same_tree, formatTree}; -function ofNumber(int $n): int +$GLOBALS['ARB_NUM_CACHE'] = []; + +function ofNumber(int $n): Node { if ($n < 0) throw new \InvalidArgumentException('ofNumber: negative values not supported'); if ($n === 0) return leaf(); + if ($n < 256 && isset($GLOBALS['ARB_NUM_CACHE'][$n])) { + return $GLOBALS['ARB_NUM_CACHE'][$n]; + } $bitTree = ($n % 2) === 1 ? stem(leaf()) : leaf(); - return fork($bitTree, ofNumber(intdiv($n, 2))); + $result = fork($bitTree, ofNumber(intdiv($n, 2))); + if ($n < 256) { + $GLOBALS['ARB_NUM_CACHE'][$n] = $result; + } + return $result; } -function toNumber(int $tree): array +function toNumber(Node $tree): array { $tree = reduce($tree); if (isLeaf($tree)) return [true, 0]; @@ -33,7 +42,7 @@ function toNumber(int $tree): array return $ok ? [true, $bit + 2 * $rest] : [false, $rest]; } -function ofList(array $elements): int +function ofList(array $elements): Node { $result = leaf(); for ($i = count($elements) - 1; $i >= 0; $i--) { @@ -42,7 +51,7 @@ function ofList(array $elements): int return $result; } -function toList(int $tree): array +function toList(Node $tree): array { $tree = reduce($tree); if (isLeaf($tree)) return [true, []]; @@ -57,7 +66,7 @@ function toList(int $tree): array /** * Strings are lists of byte values (each character encoded as a number tree). */ -function ofString(string $s): int +function ofString(string $s): Node { $bytes = []; for ($i = 0, $len = strlen($s); $i < $len; $i++) { @@ -66,7 +75,7 @@ function ofString(string $s): int return ofList($bytes); } -function toString(int $tree): array +function toString(Node $tree): array { [$ok, $elements] = toList($tree); if (!$ok) return [false, $elements]; @@ -82,17 +91,17 @@ function toString(int $tree): array return [true, $result]; } -function ofBytes(string $s): int +function ofBytes(string $s): Node { return ofString($s); } -function toBytes(int $tree): array +function toBytes(Node $tree): array { return toString($tree); } -function unwrapResult(int $tree): array +function unwrapResult(Node $tree): array { $tree = reduce($tree); if (!isFork($tree)) return ['error', null, 'Result is not a valid ok/err pair']; @@ -120,7 +129,7 @@ const HOST_BOOL_TAG = 3; const HOST_LIST_TAG = 4; const HOST_BYTES_TAG = 5; -function unwrapHostValue(int $tree): array +function unwrapHostValue(Node $tree): array { $tree = reduce($tree); if (!isFork($tree)) throw new \InvalidArgumentException('Host ABI value must be a pair'); @@ -132,13 +141,13 @@ function unwrapHostValue(int $tree): array return [(int)$tagNum, $payload]; } -function isBool(int $tree): bool +function isBool(Node $tree): bool { $tree = reduce($tree); return isLeaf($tree) || (isStem($tree) && isLeaf(stemChild($tree))); } -function decodeHostPayload(int $tag, int $payload): array +function decodeHostPayload(int $tag, Node $payload): array { switch ($tag) { case HOST_TREE_TAG: diff --git a/ext/php/src/functions.php b/ext/php/src/functions.php index 3b92245..3d82a7a 100644 --- a/ext/php/src/functions.php +++ b/ext/php/src/functions.php @@ -5,185 +5,251 @@ declare(strict_types=1); namespace Arboricx; /** - * Arena-backed Tree Calculus graph. + * Node-based Tree Calculus graph. + * + * Nodes are plain PHP objects so the runtime's refcounting GC + * can reclaim unreachable subtrees automatically. * - * Nodes are integers (refs) into four parallel arrays: * tag 0 = Leaf * tag 1 = Stem(child) * tag 2 = Fork(left, right) * tag 3 = App(function, argument) -- evaluator thunk - * tag 4 = Ind(target) -- update-in-place indirection for sharing - * - * Using ints instead of objects keeps the hot path allocation-free - * and lets us do cheap pointer-equality (same ref == same subtree). */ -$GLOBALS['ARB_TAG'] = [0 => 0]; -$GLOBALS['ARB_A'] = [0 => 0]; -$GLOBALS['ARB_B'] = [0 => 0]; -$GLOBALS['ARB_NEXT'] = 1; - -function spendFuel(int &$fuel): void +final class Node { - if ($fuel <= 0) throw new \RuntimeException('fuel exhausted'); - $fuel--; -} + public int $tag; + public ?Node $a; + public ?Node $b; -function newNode(int $tag, int $a, int $b = 0): int -{ - $id = $GLOBALS['ARB_NEXT']++; - $GLOBALS['ARB_TAG'][$id] = $tag; - $GLOBALS['ARB_A'][$id] = $a; - $GLOBALS['ARB_B'][$id] = $b; - return $id; -} - -/** - * Chase indirection (tag 4) chains to the real node. - * Self-loops are treated as terminal to avoid infinite loops. - */ -function deref(int $t): int -{ - while ($t !== 0 && ($GLOBALS['ARB_TAG'][$t] ?? -1) === 4) { - $next = $GLOBALS['ARB_A'][$t]; - if ($next === $t) break; - $t = $next; - } - return $t; -} - -/** - * Overwrite a node with an indirection to its reduced form. - * This short-circuits future reads so shared sub-terms are only - * reduced once. - */ -function setInd(int $o, int $t): void -{ - if ($o !== $t) { - $GLOBALS['ARB_TAG'][$o] = 4; - $GLOBALS['ARB_A'][$o] = $t; + public function __construct(int $tag = 0, ?Node $a = null, ?Node $b = null) + { + $this->tag = $tag; + $this->a = $a; + $this->b = $b; } } -function leaf(): int { return 0; } -function stem(int $c): int { return newNode(1, $c); } -function fork(int $l, int $r): int { return newNode(2, $l, $r); } -function app(int $f, int $x): int { return newNode(3, $f, $x); } +$GLOBALS['ARB_LEAF'] = new Node(); +$GLOBALS['ARB_CONS'] = []; -function tag(int $t): int { $t = deref($t); return $GLOBALS['ARB_TAG'][$t] ?? -1; } -function isLeaf(int $t): bool { return deref($t) === 0; } -function isStem(int $t): bool { return tag($t) === 1; } -function isFork(int $t): bool { return tag($t) === 2; } -function isApp(int $t): bool { return tag($t) === 3; } +function newNode(int $tag, ?Node $a = null, ?Node $b = null): Node +{ + if ($tag !== 3) { + $key = $tag === 1 + ? '1:' . spl_object_id($a) + : '2:' . spl_object_id($a) . ':' . spl_object_id($b); + if (isset($GLOBALS['ARB_CONS'][$key])) { + return $GLOBALS['ARB_CONS'][$key]; + } + $n = new Node($tag, $a, $b); + $GLOBALS['ARB_CONS'][$key] = $n; + return $n; + } -function stemChild(int $t): int { $t = deref($t); return $GLOBALS['ARB_A'][$t]; } -function forkLeft(int $t): int { $t = deref($t); return $GLOBALS['ARB_A'][$t]; } -function forkRight(int $t): int { $t = deref($t); return $GLOBALS['ARB_B'][$t]; } -function appFunc(int $t): int { $t = deref($t); return $GLOBALS['ARB_A'][$t]; } -function appArg(int $t): int { $t = deref($t); return $GLOBALS['ARB_B'][$t]; } + return new Node($tag, $a, $b); +} -/** - * Reduce a term to weak-head normal form (WHNF). - * - * The evaluator is iterative (no PHP stack growth) and reduces - * left-to-right: function first, then argument when needed. - * Fuel prevents infinite loops on divergent terms. - */ -function reduce(int $term, int $fuel = 100_000_000_000_000_000): int +function leaf(): Node +{ + return $GLOBALS['ARB_LEAF']; +} + +function stem(Node $c): Node +{ + return newNode(1, $c); +} + +function fork(Node $l, Node $r): Node +{ + return newNode(2, $l, $r); +} + +function app(Node $f, Node $x): Node +{ + return newNode(3, $f, $x); +} + +function tag(Node $t): int +{ + return $t->tag; +} + +function isLeaf(Node $t): bool +{ + return $t->tag === 0; +} + +function isStem(Node $t): bool +{ + return $t->tag === 1; +} + +function isFork(Node $t): bool +{ + return $t->tag === 2; +} + +function isApp(Node $t): bool +{ + return $t->tag === 3; +} + +function stemChild(Node $t): Node +{ + return $t->a; +} + +function forkLeft(Node $t): Node +{ + return $t->a; +} + +function forkRight(Node $t): Node +{ + return $t->b; +} + +function appFunc(Node $t): Node +{ + return $t->a; +} + +function appArg(Node $t): Node +{ + return $t->b; +} + +function reduce(Node $term, int $fuel = 100_000_000_000_000_000): Node { return whnf($term, $fuel); } -function whnf(int $term, int &$fuel): int +function whnf(Node $term, int &$fuel): Node { - $TAG =& $GLOBALS['ARB_TAG']; - $A =& $GLOBALS['ARB_A']; - $B =& $GLOBALS['ARB_B']; - while (true) { - $term = deref($term); - if ($TAG[$term] !== 3) return $term; + if ($term->tag !== 3) { + return $term; + } $orig = $term; - $f = whnf($A[$term], $fuel); - $x = $B[$term]; - - $f = deref($f); - $ftag = $TAG[$f]; + $f = whnf($term->a, $fuel); + $x = $term->b; + $ftag = $f->tag; // apply Leaf b = Stem b - if ($f === 0) { - $r = newNode(1, $x); setInd($orig, $r); return $r; + if ($ftag === 0) { + $orig->tag = 1; + $orig->a = $x; + $orig->b = null; + return $orig; } // apply (Stem a) b = Fork a b if ($ftag === 1) { - $r = newNode(2, $A[$f], $x); setInd($orig, $r); return $r; + $orig->tag = 2; + $orig->a = $f->a; + $orig->b = $x; + return $orig; } - if ($ftag !== 2) throw new \RuntimeException('apply: function did not reduce to tree'); + if ($ftag !== 2) { + throw new \RuntimeException('apply: function did not reduce to tree'); + } - $left = whnf($A[$f], $fuel); - $right = $B[$f]; - $left = deref($left); - $ltag = $TAG[$left]; + $left = whnf($f->a, $fuel); + $right = $f->b; + $ltag = $left->tag; // apply (Fork Leaf a) _ = a - if ($left === 0) { - $term = $right; setInd($orig, $term); spendFuel($fuel); continue; + if ($ltag === 0) { + $result = whnf($right, $fuel); + if ($orig !== $result) { + $orig->tag = $result->tag; + $orig->a = $result->a; + $orig->b = $result->b; + } + if ($fuel <= 0) throw new \RuntimeException('fuel exhausted'); + $fuel--; + return $orig; } // apply (Fork (Stem a) b) c = (a c) (b c) if ($ltag === 1) { - $term = newNode(3, newNode(3, $A[$left], $x), newNode(3, $right, $x)); - setInd($orig, $term); spendFuel($fuel); continue; + $inner1 = newNode(3, $left->a, $x); + $inner2 = newNode(3, $right, $x); + $orig->tag = 3; + $orig->a = $inner1; + $orig->b = $inner2; + $term = $orig; + if ($fuel <= 0) throw new \RuntimeException('fuel exhausted'); + $fuel--; + continue; } - if ($ltag !== 2) throw new \RuntimeException('apply: invalid Fork left child'); + if ($ltag !== 2) { + throw new \RuntimeException('apply: invalid Fork left child'); + } $arg = whnf($x, $fuel); - $arg = deref($arg); - $atag = $TAG[$arg]; + $atag = $arg->tag; // apply (Fork (Fork a b) c) Leaf = a - if ($arg === 0) { - $term = $A[$left]; setInd($orig, $term); spendFuel($fuel); continue; + if ($atag === 0) { + $result = whnf($left->a, $fuel); + if ($orig !== $result) { + $orig->tag = $result->tag; + $orig->a = $result->a; + $orig->b = $result->b; + } + if ($fuel <= 0) throw new \RuntimeException('fuel exhausted'); + $fuel--; + return $orig; } // apply (Fork (Fork a b) c) (Stem u) = b u if ($atag === 1) { - $term = newNode(3, $B[$left], $A[$arg]); - setInd($orig, $term); spendFuel($fuel); continue; + $orig->tag = 3; + $orig->a = $left->b; + $orig->b = $arg->a; + $term = $orig; + if ($fuel <= 0) throw new \RuntimeException('fuel exhausted'); + $fuel--; + continue; } // apply (Fork (Fork a b) c) (Fork u v) = (c u) v if ($atag === 2) { - $term = newNode(3, newNode(3, $right, $A[$arg]), $B[$arg]); - setInd($orig, $term); spendFuel($fuel); continue; + $inner = newNode(3, $right, $arg->a); + $orig->tag = 3; + $orig->a = $inner; + $orig->b = $arg->b; + $term = $orig; + if ($fuel <= 0) throw new \RuntimeException('fuel exhausted'); + $fuel--; + continue; } throw new \RuntimeException('apply: argument did not reduce to tree'); } } -function same_tree(int $a, int $b): bool +function same_tree(Node $a, Node $b): bool { - $a = deref($a); $b = deref($b); if ($a === $b) return true; - if (tag($a) !== tag($b)) return false; - if (isLeaf($a)) return true; - if (isStem($a)) return same_tree(stemChild($a), stemChild($b)); - if (isFork($a)) return same_tree(forkLeft($a), forkLeft($b)) && same_tree(forkRight($a), forkRight($b)); - if (isApp($a)) return same_tree(appFunc($a), appFunc($b)) && same_tree(appArg($a), appArg($b)); + if ($a->tag !== $b->tag) return false; + if ($a->tag === 0) return true; + if ($a->tag === 1) return same_tree($a->a, $b->a); + if ($a->tag === 2) return same_tree($a->a, $b->a) && same_tree($a->b, $b->b); + if ($a->tag === 3) return same_tree($a->a, $b->a) && same_tree($a->b, $b->b); return false; } -function formatTree(int $t, int $depth = 0): string +function formatTree(Node $t, int $depth = 0): string { - $t = deref($t); if ($depth > 200) return '...'; - if (isLeaf($t)) return 'Leaf'; - if (isStem($t)) return 'Stem(' . formatTree(stemChild($t), $depth + 1) . ')'; - if (isFork($t)) return 'Fork(' . formatTree(forkLeft($t), $depth + 1) . ', ' . formatTree(forkRight($t), $depth + 1) . ')'; - if (isApp($t)) return 'App(' . formatTree(appFunc($t), $depth + 1) . ', ' . formatTree(appArg($t), $depth + 1) . ')'; + if ($t->tag === 0) return 'Leaf'; + if ($t->tag === 1) return 'Stem(' . formatTree($t->a, $depth + 1) . ')'; + if ($t->tag === 2) return 'Fork(' . formatTree($t->a, $depth + 1) . ', ' . formatTree($t->b, $depth + 1) . ')'; + if ($t->tag === 3) return 'App(' . formatTree($t->a, $depth + 1) . ', ' . formatTree($t->b, $depth + 1) . ')'; return 'Unknown'; } diff --git a/ext/php/src/kernel.php b/ext/php/src/kernel.php index c9dcfbf..089842f 100644 --- a/ext/php/src/kernel.php +++ b/ext/php/src/kernel.php @@ -26,7 +26,7 @@ function loadKernelTernary(string $name): string return $content !== false ? trim($content) : ''; } -function parseTernary(string $s, int &$pos = 0): int +function parseTernary(string $s, int &$pos = 0): Node { if ($pos >= strlen($s)) { throw new \RuntimeException('parseTernary: unexpected end of string'); @@ -40,7 +40,7 @@ function parseTernary(string $s, int &$pos = 0): int }; } -function getRunArboricxToString(): ?int +function getRunArboricxToString(): ?Node { $term = loadKernelTernary('kernel_run_arboricx_to_string'); if ($term === '') return null; diff --git a/test/fixtures/append.arboricx b/test/fixtures/append.arboricx new file mode 100644 index 0000000..8bb2fd8 Binary files /dev/null and b/test/fixtures/append.arboricx differ