Files
tricu/src/Research.hs
James Eversole 2e2db07bd6 Ergonomic language features and lib cleanup
+ let bindings
+ where bindings
+ do notation

I explored enough of the alternative language design space and decided
that we should commit fully to Lambda style. That means no more highly
tacit/concatenative point-free/partial programs as default. We'll keep
taking advantage of those capabilities when it makes sense, but the
library will continue to see massive overhauls.
2026-05-23 18:28:02 -05:00

350 lines
12 KiB
Haskell

module Research where
import Crypto.Hash (hash, SHA256, Digest)
import Data.ByteArray (convert)
import Data.ByteString.Base16 (decode, encode)
import Data.List (intercalate)
import Data.Map ()
import Data.Text (Text, replace)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Word (Word8)
import qualified Data.ByteString as BS
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as T
-- Tree Calculus Types
data T = Leaf | Stem T | Fork T T
deriving (Show, Eq, Ord)
-- Abstract Syntax Tree for tricu
data TricuAST
= SVar String (Maybe String)
| SInt Integer
| SStr String
| SList [TricuAST]
| SDef String [String] TricuAST
| SApp TricuAST TricuAST
| TLeaf
| TStem TricuAST
| TFork TricuAST TricuAST
| SLambda [String] TricuAST
| SEmpty
| SImport String String
deriving (Show, Eq, Ord)
-- Lexer Tokens
data LToken
= LIdentifier String
| LIdentifierWithHash String String
| LKeywordT
| LNamespace String
| LImport String String
| LAssign
| LColon
| LDot
| LOpenParen
| LCloseParen
| LOpenBracket
| LCloseBracket
| LStringLiteral String
| LIntegerLiteral Int
| LArrowLeft
| LArrowRight
| LBindArrow
| LNewline
| LIndent Int
deriving (Eq, Show, Ord)
-- Output formats
data EvaluatedForm = Tree | FSL | AST | Ternary | Ascii | Decode | Number | StringLit
deriving (Show)
-- Environment containing previously evaluated TC terms
type Env = Map.Map String T
-- Merkle DAG Node types
-- Each Tree Calculus node becomes a content-addressed object.
type MerkleHash = Text
data Node
= NLeaf
| NStem MerkleHash
| NFork MerkleHash MerkleHash
deriving (Show, Eq, Ord)
-- | Canonical serialization of a Node for hashing.
-- Leaf: 0x00
-- Stem: 0x01 || child_hash (32 bytes)
-- Fork: 0x02 || left_hash (32 bytes) || right_hash (32 bytes)
serializeNode :: Node -> BS.ByteString
serializeNode NLeaf = BS.pack [0x00]
serializeNode (NStem h) = BS.pack [0x01] <> go (decode (encodeUtf8 h))
where go (Left _) = error "Research.serializeNode: invalid hex hash"
go (Right bs) = bs
serializeNode (NFork l r) = BS.pack [0x02] <> go (decode (encodeUtf8 l)) <> go (decode (encodeUtf8 r))
where go (Left _) = error "Research.serializeNode: invalid hex hash"
go (Right bs) = bs
-- | Hash a node per the Merkle content-addressing spec.
-- hash = SHA256( "arboricx.merkle.node.v1" <> 0x00 <> node_payload )
nodeHash :: Node -> MerkleHash
nodeHash node = decodeUtf8 (encode (sha256WithPrefix (serializeNode node)))
where sha256WithPrefix payload =
convert . (hash :: BS.ByteString -> Digest SHA256) $ utf8Tag <> BS.pack [0x00] <> payload
utf8Tag = BS.pack $ map fromIntegral $ BS.unpack "arboricx.merkle.node.v1"
-- | Deserialize a Node from canonical bytes.
deserializeNode :: BS.ByteString -> Node
deserializeNode bs =
case BS.uncons bs of
Just (0x00, rest)
| BS.null rest -> NLeaf
Just (0x01, rest)
| BS.length rest == 32 ->
NStem $ decodeUtf8 (encode rest)
Just (0x02, rest)
| BS.length rest == 64 ->
let (l, r) = BS.splitAt 32 rest
in NFork (decodeUtf8 (encode l)) (decodeUtf8 (encode r))
_ -> errorWithoutStackTrace "invalid merkle node payload"
-- ---------------------------------------------------------------------------
-- ByteString / bytestream marshalling via existing Tree Calculus conventions
-- ---------------------------------------------------------------------------
-- | Encode a single byte (Word8) as a Tree Calculus number (0..255).
ofByte :: Word8 -> T
ofByte = ofNumber . fromIntegral
-- | Decode a Tree Calculus number as a single byte (Word8).
-- Rejects values outside the range 0..255.
toByte :: T -> Either String Word8
toByte t = case toNumber t of
Left err -> Left err
Right n
| n >= 0 && n <= 255 -> Right (fromIntegral n)
| otherwise -> Left ("Byte value out of range: " ++ show n)
-- | Encode a ByteString as a Tree Calculus list of Byte trees.
ofBytes :: BS.ByteString -> T
ofBytes = ofList . map ofByte . BS.unpack
-- | Decode a Tree Calculus list of Byte trees as a ByteString.
-- Rejects non-list trees and elements that are not valid byte values (0..255).
toBytes :: T -> Either String BS.ByteString
toBytes t = case toList t of
Left err -> Left err
Right bs -> BS.pack <$> mapM toByte bs
-- | Convert a canonical Arboricx node payload (ByteString) to a Tree
-- representation (a list of Byte trees).
nodePayloadToTreeBytes :: BS.ByteString -> T
nodePayloadToTreeBytes = ofBytes
-- | Convert a Tree representation of a node payload back to ByteString.
treeBytesToNodePayload :: T -> Either String BS.ByteString
treeBytesToNodePayload = toBytes
-- | Convert a MerkleHash (hex-encoded) to a Tree of its 32 raw bytes.
hashToTreeBytes :: MerkleHash -> Either String T
hashToTreeBytes h = case decode (encodeUtf8 h) of
Left _ -> Left "Invalid hex MerkleHash"
Right raw
| BS.length raw == 32 -> Right (ofBytes raw)
| otherwise -> Left "Hash raw bytes must be 32 bytes"
-- | Convert a Tree of 32 Byte trees back to a MerkleHash (hex string).
treeBytesToHash :: T -> Either String MerkleHash
treeBytesToHash t = case toList t of
Left err -> Left err
Right bytes
| length bytes == 32 -> do
raw <- BS.pack <$> mapM toByte bytes
Right $ decodeUtf8 (encode raw)
| otherwise -> Left "Expected exactly 32 byte elements for hash"
-- | Build a Merkle DAG from a Tree Calculus term.
buildMerkle :: T -> Node
buildMerkle Leaf = NLeaf
buildMerkle (Stem t) = NStem (nodeHash child)
where child = buildMerkle t
buildMerkle (Fork l r) = NFork (nodeHash left) (nodeHash right)
where
left = buildMerkle l
right = buildMerkle r
-- Tree Calculus Reduction Rules
{-
The t operator is left associative.
1. t t a b -> a
2. t (t a) b c -> a c (b c)
3a. t (t a b) c t -> a
3b. t (t a b) c (t u) -> b u
3c. t (t a b) c (t u v) -> c u v
-}
apply :: T -> T -> T
apply (Fork Leaf a) _ = a
apply (Fork (Stem a) b) c = apply (apply a c) (apply b c)
apply (Fork (Fork _a _b) _c) Leaf = _a
apply (Fork (Fork _a _b) _c) (Stem u) = apply _b u
apply (Fork (Fork _a _b) _c) (Fork u v) = apply (apply _c u) v
-- Left associative `t`
apply Leaf b = Stem b
apply (Stem a) b = Fork a b
-- Booleans
_false :: T
_false = Leaf
_true :: T
_true = Stem Leaf
_not :: T
_not = Fork (Fork _true (Fork Leaf _false)) Leaf
-- Marshalling
ofString :: String -> T
ofString str = ofList $ map (ofNumber . toInteger . fromEnum) str
ofNumber :: Integer -> T
ofNumber 0 = Leaf
ofNumber n =
Fork
(if odd n then Stem Leaf else Leaf)
(ofNumber (n `div` 2))
ofList :: [T] -> T
ofList = foldr Fork Leaf
toNumber :: T -> Either String Integer
toNumber Leaf = Right 0
toNumber (Fork Leaf rest) = case toNumber rest of
Right n -> Right (2 * n)
Left err -> Left err
toNumber (Fork (Stem Leaf) rest) = case toNumber rest of
Right n -> Right (1 + 2 * n)
Left err -> Left err
toNumber _ = Left "Invalid Tree Calculus number"
toChar :: Integer -> Either String Char
toChar n
| n < 0 = Left "Negative character code"
| n > 0x10FFFF = Left "Character code out of Unicode range"
| n >= 0xD800 && n <= 0xDFFF = Left "Surrogate character code not allowed"
| otherwise = Right (toEnum (fromInteger n))
toString :: T -> Either String String
toString tc = do
list <- toList tc
nums <- mapM toNumber list
mapM toChar nums
toList :: T -> Either String [T]
toList Leaf = Right []
toList (Fork x rest) = case toList rest of
Right xs -> Right (x : xs)
Left err -> Left err
toList _ = Left "Invalid Tree Calculus list"
-- Outputs
formatT :: EvaluatedForm -> T -> String
formatT Tree = toSimpleT . show
formatT FSL = show
formatT AST = show . toAST
formatT Ternary = toTernaryString
formatT Ascii = toAscii
formatT Decode = decodeResult
formatT Number = either (\e -> "<not-number: " ++ e ++ ">") show . toNumber
formatT StringLit = either (\e -> "<not-string: " ++ e ++ ">") show . toString
toSimpleT :: String -> String
toSimpleT s = T.unpack
$ replace "Fork" "t"
$ replace "Stem" "t"
$ replace "Leaf" "t"
$ T.pack s
toTernaryString :: T -> String
toTernaryString Leaf = "0"
toTernaryString (Stem t) = "1" ++ toTernaryString t
toTernaryString (Fork t1 t2) = "2" ++ toTernaryString t1 ++ toTernaryString t2
toAST :: T -> TricuAST
toAST Leaf = TLeaf
toAST (Stem a) = TStem (toAST a)
toAST (Fork a b) = TFork (toAST a) (toAST b)
toAscii :: T -> String
toAscii tree = go tree "" True
where
go :: T -> String -> Bool -> String
go Leaf prefix isLast =
prefix ++ (if isLast then "`-- " else "|-- ") ++ "Leaf\n"
go (Stem t) prefix isLast =
prefix ++ (if isLast then "`-- " else "|-- ") ++ "Stem\n"
++ go t (prefix ++ (if isLast then " " else "| ")) True
go (Fork left right) prefix isLast =
prefix ++ (if isLast then "`-- " else "|-- ") ++ "Fork\n"
++ go left (prefix ++ (if isLast then " " else "| ")) False
++ go right (prefix ++ (if isLast then " " else "| ")) True
decodeResult :: T -> String
decodeResult Leaf = "t"
decodeResult tc =
case (toString tc, toList tc, toNumber tc) of
(Right s, _, _) | all isCommonChar s -> "\"" ++ s ++ "\""
(_, _, Right n) -> show n
(_, Right xs@(_:_), _) -> "[" ++ intercalate ", " (map decodeResult xs) ++ "]"
(_, Right [], _) -> "[]"
_ -> formatT Tree tc
where
isCommonChar c =
let n = fromEnum c
in (n >= 32 && n <= 126)
|| n == 9
|| n == 10
|| n == 13
-- ---------------------------------------------------------------------------
-- DAG node-table export (for host-language kernel embedding)
-- ---------------------------------------------------------------------------
-- | Export a term's Merkle DAG as a topologically-sorted node table.
-- Children appear before parents so all index references are forward.
-- Returns (root index, list of (tag, [child_indices])).
exportDag :: T -> (Int, [(String, [Int])])
exportDag term =
let (root, acc, _) = collectDag term [] Set.empty
-- acc is in reverse post-order (children first, root last)
ordered = reverse acc
idxMap = Map.fromList [(h, i) | (i, (h, _)) <- zip [0..] ordered]
rootIdx = idxMap Map.! root
lines_ = map (formatNode idxMap . snd) ordered
in (rootIdx, lines_)
where
collectDag :: T -> [(MerkleHash, Node)] -> Set.Set MerkleHash -> (MerkleHash, [(MerkleHash, Node)], Set.Set MerkleHash)
collectDag Leaf acc seen =
let h = nodeHash NLeaf
in if Set.member h seen then (h, acc, seen) else (h, (h, NLeaf) : acc, Set.insert h seen)
collectDag (Stem t) acc seen =
let (ch, acc', seen') = collectDag t acc seen
node = NStem ch
h = nodeHash node
in if Set.member h seen' then (h, acc', seen') else (h, (h, node) : acc', Set.insert h seen')
collectDag (Fork l r) acc seen =
let (lh, acc', seen') = collectDag l acc seen
(rh, acc'', seen'') = collectDag r acc' seen'
node = NFork lh rh
h = nodeHash node
in if Set.member h seen'' then (h, acc'', seen'') else (h, (h, node) : acc'', Set.insert h seen'')
formatNode :: Map.Map MerkleHash Int -> Node -> (String, [Int])
formatNode _ NLeaf = ("leaf", [])
formatNode idxMap (NStem ch) = ("stem", [idxMap Map.! ch])
formatNode idxMap (NFork l r) = ("fork", [idxMap Map.! l, idxMap Map.! r])