diff --git a/README.md b/README.md index 04ac7ce..696bc79 100644 --- a/README.md +++ b/README.md @@ -2,28 +2,13 @@ ## Introduction -tricu (pronounced "tree-shoe") is a purely functional interpreted language implemented in Haskell. It is fundamentally based on the application of [Tree Calculus](https://github.com/barry-jay-personal/typed_tree_calculus/blob/main/typed_program_analysis.pdf) terms, but minimal syntax sugar is included. - -*This experiment has concluded. tricu will see no further development or bugfixes.* +tricu (pronounced "tree-shoe") is a programming language experiment in Haskell. It is fundamentally based on the application of [Triage Calculus](https://olydis.medium.com/a-visual-introduction-to-tree-calculus-2f4a34ceffc2), an extended form of [Tree Calculus](https://github.com/barry-jay-personal/typed_tree_calculus/blob/main/typed_program_analysis.pdf), terms, but minimal syntax sugar is included. tricu is the word for "tree" in Lojban: `(x1) is a tree of species/cultivar (x2)`. ## Acknowledgements -Tree Calculus was discovered by [Barry Jay](https://github.com/barry-jay-personal/blog). - -[treecalcul.us](https://treecalcul.us) is an excellent website with an intuitive Tree Calculus code playground created by [Johannes Bader](https://johannes-bader.com/) that introduced me to Tree Calculus. - -## Features - -- Tree Calculus **operator**: `t` -- **Immutable definitions**: `x = t t` -- **Lambda abstraction**: `id = (a : a)` -- **List, Number, and String** literals: `[(2) ("Hello")]` -- **Function application**: `not (not false)` -- **Higher order/first-class functions**: `map (a : append a "!") [("Hello")]` -- **Intensionality** blurs the distinction between functions and data (see REPL examples) -- **Content-addressed store**: save, version, tag, and recall your tricu terms. +Tree Calculus was discovered by [Barry Jay](https://github.com/barry-jay-personal/blog). The addition of Triage rules were suggested by [Johannes Bader](https://johannes-bader.com/). Johannes is also the creator of [treecalcul.us](https://treecalcul.us) which has a great intuitive code playground using his language LambAda. ## REPL examples @@ -47,32 +32,17 @@ tricu < -- or calculate its size (/demos/size.tri) tricu < size not? tricu > 12 -tricu < !help -tricu version 0.20.0 -Available commands: - !exit - Exit the REPL - !clear - Clear the screen - !reset - Reset preferences for selected versions - !help - Show tricu version and available commands - !output - Change output format (tree|fsl|ast|ternary|ascii|decode) - !definitions - List all defined terms in the content store - !import - Import definitions from file (definitions are stored) - !watch - Watch a file for changes (definitions are stored) - !versions - Show all versions of a term by name - !select - Select a specific version of a term for subsequent lookups - !tag - Add or update a tag for a term by hash or name +tricu < -- REPL Commands: +tricu < !definitions -- Lists all available definitions +tricu < !output -- Change output format (Tree, FSL, AST, etc.) +tricu < !import -- Import definitions from a file +tricu < !exit -- Exit the REPL +tricu < !clear -- ANSI screen clear +tricu < !save -- Save all REPL definitions to a file that you can !import +tricu < !reset -- Clear all REPL definitions +tricu < !version -- Print tricu version ``` -## Content Store - -tricu uses a "content store" SQLite database that saves and versions your definitions persistently. - -* **Persistent definitions:** Any term you define in the REPL is automatically saved. -* **Content-addressed:** Terms are stored based on a SHA256 hash of their content. This means identical terms are stored only once, even if they have different names. -* **Versioning and history:** If you redefine a name, the Content Store keeps a record of previous definitions associated with that name. You can explore the history of a term and access older versions. -* **Tagging:** You can assign tags to versions of your terms to organize and quickly switch between related function versions. -* **Querying:** The store allows you to search for terms by name, hash, or tags. - ## Installation and Use You can easily build and run this project using [Nix](https://nixos.org/download/). diff --git a/src/ContentStore.hs b/src/ContentStore.hs index 7b15626..83ba82e 100644 --- a/src/ContentStore.hs +++ b/src/ContentStore.hs @@ -4,10 +4,9 @@ import Research import Parser import Control.Monad (foldM, forM) -import Crypto.Hash (hash, SHA256, Digest) import Data.ByteString (ByteString) -import Data.List (intercalate, nub, sortBy, sort) -import Data.Maybe (catMaybes) +import Data.List (nub, sort) +import Data.Maybe (catMaybes, fromJust) import Data.Text (Text) import Database.SQLite.Simple import Database.SQLite.Simple.FromRow (FromRow(..), field) @@ -15,30 +14,24 @@ import System.Directory (createDirectoryIfMissing, getXdgDirectory import System.FilePath ((), takeDirectory) import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as LBS import qualified Data.Map as Map -import qualified Data.Serialize as Cereal import qualified Data.Text as T +data StoredNode = StoredNode ByteString deriving (Show) + +instance FromRow StoredNode where + fromRow = StoredNode <$> field + data StoredTerm = StoredTerm { termHash :: Text , termNames :: Text - , termData :: ByteString , termMetadata :: Text , termCreatedAt :: Integer , termTags :: Text } deriving (Show) instance FromRow StoredTerm where - fromRow = StoredTerm <$> field <*> field <*> field <*> field <*> field <*> field - -tryDeserializeTerm :: ByteString -> IO (Maybe T) -tryDeserializeTerm bs = - case deserializeTerm bs of - Right t -> return $ Just t - Left err -> do - putStrLn $ "Error deserializing term: " ++ err - return Nothing + fromRow = StoredTerm <$> field <*> field <*> field <*> field <*> field parseNameList :: Text -> [Text] parseNameList = filter (not . T.null) . T.splitOn "," @@ -60,6 +53,9 @@ initContentStore = do \tags TEXT DEFAULT '')" execute_ conn "CREATE INDEX IF NOT EXISTS terms_names_idx ON terms(names)" execute_ conn "CREATE INDEX IF NOT EXISTS terms_tags_idx ON terms(tags)" + execute_ conn "CREATE TABLE IF NOT EXISTS merkle_nodes (\ + \hash TEXT PRIMARY KEY, \ + \node_data BLOB NOT NULL)" return conn getContentStorePath :: IO FilePath @@ -67,40 +63,18 @@ getContentStorePath = do dataDir <- getXdgDirectory XdgData "tricu" return $ dataDir "content-store.db" -instance Cereal.Serialize T where - put Leaf = Cereal.putWord8 0 - put (Stem t) = do - Cereal.putWord8 1 - Cereal.put t - put (Fork a b) = do - Cereal.putWord8 2 - Cereal.put a - Cereal.put b - get = do - tag <- Cereal.getWord8 - case tag of - 0 -> return Leaf - 1 -> Stem <$> Cereal.get - 2 -> Fork <$> Cereal.get <*> Cereal.get - _ -> fail $ "Invalid tag for T: " ++ show tag - -serializeTerm :: T -> ByteString -serializeTerm = LBS.toStrict . Cereal.encodeLazy - -deserializeTerm :: ByteString -> Either String T -deserializeTerm = Cereal.decodeLazy . LBS.fromStrict hashTerm :: T -> Text -hashTerm = T.pack . show . (hash :: ByteString -> Digest SHA256) . serializeTerm +hashTerm = nodeHash . buildMerkle storeTerm :: Connection -> [String] -> T -> IO Text storeTerm conn newNamesStrList term = do - let termBS = serializeTerm term - termHashText = hashTerm term + let termHashText = hashTerm term newNamesTextList = map T.pack newNamesStrList metadataText = T.pack "{}" - + -- Store all Merkle nodes for this term + _ <- storeMerkleNodes conn term existingNamesQuery <- query conn "SELECT names FROM terms WHERE hash = ?" (Only termHashText) :: IO [Only Text] @@ -110,7 +84,7 @@ storeTerm conn newNamesStrList term = do let allNamesToStore = serializeNameList newNamesTextList execute conn "INSERT INTO terms (hash, names, term_data, metadata, tags) VALUES (?, ?, ?, ?, ?)" - (termHashText, allNamesToStore, termBS, metadataText, T.pack "") + (termHashText, allNamesToStore, BS.pack [], metadataText, T.pack "") [(Only currentNamesText)] -> do let currentNamesList = parseNameList currentNamesText let combinedNamesList = currentNamesList ++ newNamesTextList @@ -118,9 +92,61 @@ storeTerm conn newNamesStrList term = do execute conn "UPDATE terms SET names = ?, metadata = ? WHERE hash = ?" (allNamesToStore, metadataText, termHashText) + _ -> error $ "Multiple terms with same hash? " ++ show (length existingNamesQuery) return termHashText +-- | Reconstruct a Tree Calculus term from its Merkle root hash. +-- Recursively loads nodes and rebuilds the T structure. +loadTree conn h + | h == nodeHash NLeaf = return (Just Leaf) -- NLeaf is implicit, not stored + | otherwise = do + maybeNode <- getNodeMerkle conn h + case maybeNode of + Nothing -> return Nothing + Just node -> Just <$> buildTree node + where + buildTree :: Node -> IO T + buildTree (NStem childHash) = do + child <- fromJust <$> loadTree conn childHash + return (Stem child) + buildTree (NFork lHash rHash) = do + left <- fromJust <$> loadTree conn lHash + right <- fromJust <$> loadTree conn rHash + return (Fork left right) + +-- | Store all nodes of a Merkle DAG by traversing the Term and building/storing nodes. +-- Returns the hash of the root node. +storeMerkleNodes :: Connection -> T -> IO MerkleHash +storeMerkleNodes _ Leaf = return $ nodeHash NLeaf +storeMerkleNodes conn (Stem t) = do + childHash <- storeMerkleNodes conn t + let thisNode = NStem childHash + putMerkleNode conn thisNode + return $ nodeHash thisNode +storeMerkleNodes conn (Fork l r) = do + leftHash <- storeMerkleNodes conn l + rightHash <- storeMerkleNodes conn r + let thisNode = NFork leftHash rightHash + putMerkleNode conn thisNode + return $ nodeHash thisNode + + +-- | Insert a Merkle node into the store (idempotent). +putMerkleNode :: Connection -> Node -> IO () +putMerkleNode conn node = + execute conn "INSERT OR IGNORE INTO merkle_nodes (hash, node_data) VALUES (?, ?)" + (nodeHash node, serializeNode node) + +-- | Retrieve a Merkle node by its hash. +getNodeMerkle :: Connection -> MerkleHash -> IO (Maybe Node) +getNodeMerkle conn h = + queryMaybeOne conn "SELECT node_data FROM merkle_nodes WHERE hash = ?" (Only h) >>= \case + Just (StoredNode bs) -> return $ Just (deserializeNode bs) + Nothing -> return Nothing + + + hashToTerm :: Connection -> Text -> IO (Maybe StoredTerm) hashToTerm conn hashText = queryMaybeOne conn (selectStoredTermFields <> " WHERE hash = ?") (Only hashText) @@ -148,7 +174,7 @@ loadTerm :: Connection -> String -> IO (Maybe T) loadTerm conn identifier = do result <- getTerm conn (T.pack identifier) case result of - Just storedTerm -> tryDeserializeTerm (termData storedTerm) + Just storedTerm -> loadTree conn (termHash storedTerm) Nothing -> return Nothing getTerm :: Connection -> Text -> IO (Maybe StoredTerm) @@ -163,7 +189,7 @@ loadEnvironment conn = do foldM addTermToEnv Map.empty terms where addTermToEnv env storedTerm = do - maybeT <- tryDeserializeTerm (termData storedTerm) + maybeT <- loadTree conn (termHash storedTerm) case maybeT of Just t -> do let namesList = parseNameList (termNames storedTerm) @@ -174,11 +200,11 @@ termVersions :: Connection -> String -> IO [(Text, T, Integer)] termVersions conn name = do let nameText = T.pack name results <- query conn - ("SELECT hash, term_data, created_at FROM terms WHERE (names = ? OR names LIKE ? OR names LIKE ? OR names LIKE ?) ORDER BY created_at DESC") + ("SELECT hash, created_at FROM terms WHERE (names = ? OR names LIKE ? OR names LIKE ? OR names LIKE ?) ORDER BY created_at DESC") (nameText, nameText <> T.pack ",%", T.pack "%," <> nameText <> T.pack ",%", T.pack "%," <> nameText) - catMaybes <$> mapM (\(hashVal, termDataVal, timestamp) -> do - maybeT <- tryDeserializeTerm termDataVal + catMaybes <$> mapM (\(hashVal, timestamp) -> do + maybeT <- loadTree conn hashVal return $ fmap (\t -> (hashVal, t, timestamp)) maybeT ) results @@ -220,7 +246,7 @@ allTermTags conn = do query_ conn (selectStoredTermFields <> " WHERE tags IS NOT NULL AND tags != '' ORDER BY created_at DESC") selectStoredTermFields :: Query -selectStoredTermFields = "SELECT hash, names, term_data, metadata, created_at, tags FROM terms" +selectStoredTermFields = "SELECT hash, names, metadata, created_at, tags FROM terms" queryMaybeOne :: (FromRow r, ToRow q) => Connection -> Query -> q -> IO (Maybe r) queryMaybeOne conn qry params = do diff --git a/src/Eval.hs b/src/Eval.hs index 28ccf0f..fdfc8d7 100644 --- a/src/Eval.hs +++ b/src/Eval.hs @@ -113,13 +113,7 @@ resolveTermFromStore conn selectedVersions name mhash = case mhash of [(_, term, _)] -> return $ Just term _ -> return Nothing -- Ambiguous or too many matches Nothing -> case Map.lookup name selectedVersions of - Just hash -> do - mterm <- hashToTerm conn hash - case mterm of - Just term -> case deserializeTerm (termData term) of - Right t -> return $ Just t - Left _ -> return Nothing - Nothing -> return Nothing + Just hash -> loadTree conn hash Nothing -> do versions <- termVersions conn name case versions of diff --git a/src/Research.hs b/src/Research.hs index 781208e..1a0b5ec 100644 --- a/src/Research.hs +++ b/src/Research.hs @@ -1,12 +1,17 @@ module Research where +import Data.ByteArray (convert) +import Data.Char (chr, ord) import Data.List (intercalate) import Data.Map (Map) -import Data.Text (Text, replace) +import Data.Text (Text, replace, unpack) +import Data.Word (Word8) import System.Console.CmdArgs (Data, Typeable) +import qualified Data.ByteString as BS import qualified Data.Map as Map -import qualified Data.Text as T +import qualified Data.Text as T +import Crypto.Hash (hash, SHA256, Digest) -- Tree Calculus Types data T = Leaf | Stem T | Fork T T @@ -54,6 +59,93 @@ data EvaluatedForm = TreeCalculus | FSL | AST | Ternary | Ascii | Decode -- Environment containing previously evaluated TC terms type Env = Map.Map String T +-- Merkle DAG Node types +-- Each Tree Calculus node becomes a content-addressed object. + +type MerkleHash = Text + +data Node + = NLeaf + | NStem MerkleHash + | NFork MerkleHash MerkleHash + deriving (Show, Eq, Ord) + +-- | Canonical serialization of a Node for hashing. +-- Leaf: 0x00 +-- Stem: 0x01 || child_hash (32 bytes) +-- Fork: 0x02 || left_hash (32 bytes) || right_hash (32 bytes) +serializeNode :: Node -> BS.ByteString +serializeNode NLeaf = BS.pack [0x00] +serializeNode (NStem h) = BS.pack [0x01] <> hexToBytes h +serializeNode (NFork l r) = BS.pack [0x02] <> hexToBytes l <> hexToBytes r + +-- | Hash a node per the Merkle content-addressing spec. +-- hash = SHA256( "tricu.merkle.node.v1" <> 0x00 <> node_payload ) +nodeHash :: Node -> MerkleHash +nodeHash node = bytesToHex (sha256WithPrefix (serializeNode node)) + where sha256WithPrefix payload = + convert . (hash :: BS.ByteString -> Digest SHA256) $ utf8Tag <> BS.pack [0x00] <> payload + utf8Tag = BS.pack $ map fromIntegral $ BS.unpack "tricu.merkle.node.v1" + +-- | Convert a Hex Text hash into raw ByteString (2 hex chars per byte) +hexToBytes :: Text -> BS.ByteString +hexToBytes h = BS.pack $ map combinePair pairs + where + chars = unpack h + pairs = chunkPairs chars + chunkPairs :: String -> [(Char, Char)] + chunkPairs (c1:c2:rest) = (c1, c2) : chunkPairs rest + chunkPairs [] = [] + chunkPairs _ = error "hexToBytes: odd number of hex digits" + combinePair :: (Char, Char) -> Word8 + combinePair (c1, c2) = fromIntegral (hexDigitToInt c1 * 16 + hexDigitToInt c2) + hexDigitToInt :: Char -> Int + hexDigitToInt c + | '0' <= c && c <= '9' = ord c - ord '0' + | 'a' <= c && c <= 'f' = ord c - ord 'a' + 10 + | 'A' <= c && c <= 'F' = ord c - ord 'A' + 10 + | otherwise = error $ "Invalid hex digit: " ++ show c + +-- | Deserialize a Node from canonical bytes. +deserializeNode :: BS.ByteString -> Node +deserializeNode bs = + case BS.uncons bs of + Just (0x00, rest) + | BS.null rest -> NLeaf + + Just (0x01, rest) + | BS.length rest == 32 -> + NStem $ bytesToHex rest + + Just (0x02, rest) + | BS.length rest == 64 -> + let (l, r) = BS.splitAt 32 rest + in NFork (bytesToHex l) (bytesToHex r) + + _ -> error "invalid merkle node payload" + + +-- | Convert 32-byte ByteString back to hex Text +bytesToHex :: BS.ByteString -> Text +bytesToHex bs = T.pack $ concatMap byteToHexChars $ BS.unpack bs + where + byteToHexChars :: Word8 -> String + byteToHexChars w = [hexDigit (fromIntegral w `div` 16), hexDigit (fromIntegral w `mod` 16)] + hexDigit :: Int -> Char + hexDigit n + | n < 10 = chr (ord '0' + n) + | otherwise = chr (ord 'a' + n - 10) + +-- | Build a Merkle DAG from a Tree Calculus term. +buildMerkle :: T -> Node +buildMerkle Leaf = NLeaf +buildMerkle (Stem t) = NStem (nodeHash child) + where child = buildMerkle t +buildMerkle (Fork l r) = NFork (nodeHash left) (nodeHash right) + where + left = buildMerkle l + right = buildMerkle r + -- Tree Calculus Reduction Rules {- The t operator is left associative. diff --git a/tricu.cabal b/tricu.cabal index 992e9f8..20fc8a7 100644 --- a/tricu.cabal +++ b/tricu.cabal @@ -39,6 +39,7 @@ executable tricu , fsnotify , haskeline , megaparsec + , memory , mtl , sqlite-simple , tasty @@ -82,6 +83,7 @@ test-suite tricu-tests , fsnotify , haskeline , megaparsec + , memory , mtl , sqlite-simple , tasty