diff --git a/AGENTS.md b/AGENTS.md index 8cb9779..d6292c9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -50,7 +50,7 @@ nix build .# Tests live in `test/Spec.hs` and use **Tasty** + **HUnit**. ```bash -nix flake check # or: nix build .#test +nix flake check ``` ### Test groups @@ -91,6 +91,14 @@ head (map f xs) → From lib/list.tri -- line comment ``` +CRITICAL: + +When working with recursion in `tricu` files: + +1. Put consumed data first in recursive workers. +2. Let data shape drive recursion. +3. Do not let counters unroll over abstract input. + ## 5. Output Formats The `eval` command accepts `--form` (shorthand `-t`): diff --git a/flake.nix b/flake.nix index 024ddac..5aca8da 100644 --- a/flake.nix +++ b/flake.nix @@ -21,21 +21,6 @@ tricuPackage = haskellPackages.callCabal2nix packageName self {}; - tricuTests = - hsLib.overrideCabal tricuPackage (old: { - doCheck = true; - - configureFlags = (old.configureFlags or []) ++ [ - "--enable-tests" - ]; - - checkPhase = '' - runHook preCheck - ./Setup test tricu-tests --show-details=direct - runHook postCheck - ''; - }); - customGHC = haskellPackages.ghcWithPackages (hpkgs: with hpkgs; [ megaparsec ]); @@ -43,10 +28,8 @@ packages.${packageName} = tricuPackage; packages.default = tricuPackage; - packages.test = tricuTests; - - checks.${packageName} = tricuTests; - checks.default = tricuTests; + checks.${packageName} = tricuPackage; + checks.default = tricuPackage; defaultPackage = self.packages.${system}.default; diff --git a/lib/arborix.tri b/lib/arborix.tri index cc9fbe0..df567bd 100644 --- a/lib/arborix.tri +++ b/lib/arborix.tri @@ -9,11 +9,14 @@ readArborixMagic = (bs : expectBytes arborixMagic bs) readArborixHeader = (bs : bindResult (readArborixMagic bs) - (_ r0 : - bindResult (readU16BEBytes r0) - (major r1 : - bindResult (readU16BEBytes r1) - (minor r2 : - bindResult (readU32BEBytes r2) - (sections r3 : - ok (pair major (pair minor sections)) r3))))) + (_ afterMagic : + bindResult (readBytes 2 afterMagic) + (majorVersion afterMajor : + bindResult (readBytes 2 afterMajor) + (minorVersion afterMinor : + bindResult (readBytes 4 afterMinor) + (sectionCount afterSectionCount : + ok + (pair majorVersion + (pair minorVersion sectionCount)) + afterSectionCount))))) diff --git a/lib/binary.tri b/lib/binary.tri index bca4bd8..b7f00e0 100644 --- a/lib/binary.tri +++ b/lib/binary.tri @@ -26,26 +26,38 @@ readU8 = (bytes : matchList (h r : ok h r) bytes) -readBytesTaken = n bytes : bytesTake n bytes -readBytesRest = n bytes : bytesDrop n bytes -readBytesEnough? = n bytes : equal? (bytesLength (readBytesTaken n bytes)) n +readBytes_ = y (self bs n i original acc : + matchList + (matchBool + (ok (reverse acc) bs) + (err errUnexpectedEof original) + (equal? i n)) + (h r : + matchBool + (ok (reverse acc) bs) + (self r n (succ i) original (pair h acc)) + (equal? i n)) + bs) -readBytes = (n bytes : matchBool - (ok (readBytesTaken n bytes) (readBytesRest n bytes)) - (err errUnexpectedEof bytes) - (readBytesEnough? n bytes)) +readBytes = (n bs : readBytes_ bs n 0 bs t) unit = t -expectBytes = (expected bs : - matchResult - (code rest : err code bs) - (taken rest : - matchBool - (ok unit rest) - (err errUnexpectedBytes bs) - (bytesEq? taken expected)) - (readBytes (bytesLength expected) bs)) +expectBytes_ = y (self expected bs original : + matchList + (ok unit bs) + (expectedByte expectedRest : + matchResult + (code rest : err code original) + (actual rest : + matchBool + (self expectedRest rest original) + (err errUnexpectedBytes original) + (byteEq? actual expectedByte)) + (readU8 bs)) + expected) + +expectBytes = (expected bs : expectBytes_ expected bs bs) expectU8 = (expected bs : matchResult @@ -57,9 +69,6 @@ expectU8 = (expected bs : (byteEq? actual expected)) (readU8 bs)) -read2 = (bs : readBytes 2 bs) -read4 = (bs : readBytes 4 bs) - mapResult = (f result : matchResult (code rest : err code rest) @@ -72,5 +81,7 @@ bindResult = (result f : (value rest : f value rest) result) +read2 = (bs : readBytes 2 bs) +read4 = (bs : readBytes 4 bs) readU16BEBytes = (bs : read2 bs) readU32BEBytes = (bs : read4 bs) diff --git a/notes/recursive-consumers.md b/notes/recursive-consumers.md new file mode 100644 index 0000000..a1ae6b2 --- /dev/null +++ b/notes/recursive-consumers.md @@ -0,0 +1,267 @@ +# Recursive Consumer Argument Order + +## Core issue + +Partial application is generally fine in tricu. The problem appears with recursive consumer functions when loop-control arguments are known before the consumed data is available. + +The concrete case was `readBytes`. + +This worked: + +```tricu +(readBytes 2) [(1) (2) (3)] +``` + +This used to explode in space: + +```tricu +readBytes 2 +``` + +At first this looked like a general partial-application problem, but it was not. Other partial applications, such as partially applying `map`, normalized safely. The issue was the argument order and recursive shape of `readBytes_`. + +## What went wrong + +The original worker had loop-control arguments before the byte stream: + +```tricu +readBytes_ = y (self n i bs original acc : ...) +readBytes = (n bs : readBytes_ n 0 bs bs t) +``` + +After partially applying: + +```tricu +readBytes 2 +``` + +the evaluator knew: + +```text +n = 2 +i = 0 +``` + +but did not know: + +```text +bs +original +acc +``` + +Because the counter values were known, the evaluator could reduce checks like: + +```tricu +equal? i n +``` + +and begin unrolling recursion symbolically before the byte stream existed. That produced a large residual tree describing possible stream cases, rests, and accumulated values. + +The bug was not recursion itself. The bug was allowing counters to drive recursion while the consumed structure was still abstract. + +## Why `map`-style partial application is safe + +A partially-applied list consumer such as: + +```tricu +map (i : append i " world!") +``` + +is safe because recursion is blocked on the missing list argument. The function cannot recurse until it sees whether the list is empty or a cons cell. + +Safe shape: + +```text +waiting for input +recursion blocked until input is supplied +``` + +Unsafe shape: + +```text +waiting for input +known counters still allow symbolic recursion +``` + +## Fix + +Put the consumed data first in the recursive worker and make the first major operation inspect that data. + +Corrected shape: + +```tricu +readBytes_ = y (self bs n i original acc : + matchList + (matchBool + (ok (reverse acc) bs) + (err errUnexpectedEof original) + (equal? i n)) + (h r : + matchBool + (ok (reverse acc) bs) + (self r n (succ i) original (pair h acc)) + (equal? i n)) + bs) + +readBytes = (n bs : readBytes_ bs n 0 bs t) +``` + +Now: + +```tricu +readBytes 2 +``` + +becomes: + +```tricu +bs : readBytes_ bs 2 0 bs t +``` + +Since `bs` is abstract and the worker immediately performs: + +```tricu +matchList ... bs +``` + +evaluation blocks at the data boundary instead of unrolling the counter loop. + +## General rule + +For recursive consumers, the consumed structure should drive evaluation. + +Prefer: + +```tricu +worker = y (self input control state : + matchInput + baseCase + (piece rest : ... self rest control nextState ...) + input) +``` + +Avoid: + +```tricu +worker = y (self control state input : + if controlDone + done + (... self nextControl nextState rest ...)) +``` + +In practice: + +```text +worker input control state +``` + +is safer than: + +```text +worker control state input +``` + +## Accumulators + +Be careful not to finalize or transform an abstract accumulator too early. + +For example: + +```tricu +ok (reverse acc) bs +``` + +is fine when reached after concrete input has driven the recursion, but it can become pathological if reached while `acc` is still abstract. + +Guidelines: + +- Accumulate cheaply during recursion. +- Finalize, reverse, or validate only after input has forced the function to a concrete success point. +- Do not let counters select a success branch while the accumulator is still abstract. + +## Parser guidance + +For byte or parser consumers, prefer streaming over global slicing of unknown input. + +Prefer: + +```tricu +read one byte +compare or accumulate +recurse on rest +``` + +Avoid relying on: + +```tricu +taken = bytesTake n bs +rest = bytesDrop n bs +enough = bytesLength taken == n +``` + +The slice-based version may be correct on concrete input but can behave badly when partially applied over abstract input. + +Streaming alone is not enough; the recursive worker must also be data-first. + +## Checklist + +When writing a recursive consumer, ask: + +1. What structure is consumed? +2. What argument should block recursion when unknown? +3. Are counters available before the consumed structure? +4. Could partial application specialize the loop before data arrives? +5. Does any branch process an abstract accumulator or rest value? +6. Does the worker put consumed data before counters and state? + +## Safe and unsafe examples + +Safe: + +```tricu +readU8 +bs : readU8 bs +readBytes 2 [(1) (2) (3)] +(readBytes 2) [(1) (2) (3)] +map (i : append i " world!") +``` + +Previously unsafe before the data-first rewrite: + +```tricu +readBytes 2 +readBytes_ 2 0 +``` + +## Implication for Arborix + +Arborix parsers will include many recursive consumers: + +- read N bytes +- read N section records +- scan records for an ID +- parse node records +- validate closures + +These should use data-first recursive workers. + +Avoid: + +```tricu +readSectionRecords_ count index bs acc +``` + +Prefer: + +```tricu +readSectionRecords_ bs count index acc +``` + +## Short rule + +```text +Put consumed data first in recursive workers. +Let data shape drive recursion. +Do not let counters unroll over abstract input. +```