diff --git a/README.md b/README.md index 04ac7ce..696bc79 100644 --- a/README.md +++ b/README.md @@ -2,28 +2,13 @@ ## Introduction -tricu (pronounced "tree-shoe") is a purely functional interpreted language implemented in Haskell. It is fundamentally based on the application of [Tree Calculus](https://github.com/barry-jay-personal/typed_tree_calculus/blob/main/typed_program_analysis.pdf) terms, but minimal syntax sugar is included. - -*This experiment has concluded. tricu will see no further development or bugfixes.* +tricu (pronounced "tree-shoe") is a programming language experiment in Haskell. It is fundamentally based on the application of [Triage Calculus](https://olydis.medium.com/a-visual-introduction-to-tree-calculus-2f4a34ceffc2), an extended form of [Tree Calculus](https://github.com/barry-jay-personal/typed_tree_calculus/blob/main/typed_program_analysis.pdf), terms, but minimal syntax sugar is included. tricu is the word for "tree" in Lojban: `(x1) is a tree of species/cultivar (x2)`. ## Acknowledgements -Tree Calculus was discovered by [Barry Jay](https://github.com/barry-jay-personal/blog). - -[treecalcul.us](https://treecalcul.us) is an excellent website with an intuitive Tree Calculus code playground created by [Johannes Bader](https://johannes-bader.com/) that introduced me to Tree Calculus. - -## Features - -- Tree Calculus **operator**: `t` -- **Immutable definitions**: `x = t t` -- **Lambda abstraction**: `id = (a : a)` -- **List, Number, and String** literals: `[(2) ("Hello")]` -- **Function application**: `not (not false)` -- **Higher order/first-class functions**: `map (a : append a "!") [("Hello")]` -- **Intensionality** blurs the distinction between functions and data (see REPL examples) -- **Content-addressed store**: save, version, tag, and recall your tricu terms. +Tree Calculus was discovered by [Barry Jay](https://github.com/barry-jay-personal/blog). The addition of Triage rules were suggested by [Johannes Bader](https://johannes-bader.com/). Johannes is also the creator of [treecalcul.us](https://treecalcul.us) which has a great intuitive code playground using his language LambAda. ## REPL examples @@ -47,32 +32,17 @@ tricu < -- or calculate its size (/demos/size.tri) tricu < size not? tricu > 12 -tricu < !help -tricu version 0.20.0 -Available commands: - !exit - Exit the REPL - !clear - Clear the screen - !reset - Reset preferences for selected versions - !help - Show tricu version and available commands - !output - Change output format (tree|fsl|ast|ternary|ascii|decode) - !definitions - List all defined terms in the content store - !import - Import definitions from file (definitions are stored) - !watch - Watch a file for changes (definitions are stored) - !versions - Show all versions of a term by name - !select - Select a specific version of a term for subsequent lookups - !tag - Add or update a tag for a term by hash or name +tricu < -- REPL Commands: +tricu < !definitions -- Lists all available definitions +tricu < !output -- Change output format (Tree, FSL, AST, etc.) +tricu < !import -- Import definitions from a file +tricu < !exit -- Exit the REPL +tricu < !clear -- ANSI screen clear +tricu < !save -- Save all REPL definitions to a file that you can !import +tricu < !reset -- Clear all REPL definitions +tricu < !version -- Print tricu version ``` -## Content Store - -tricu uses a "content store" SQLite database that saves and versions your definitions persistently. - -* **Persistent definitions:** Any term you define in the REPL is automatically saved. -* **Content-addressed:** Terms are stored based on a SHA256 hash of their content. This means identical terms are stored only once, even if they have different names. -* **Versioning and history:** If you redefine a name, the Content Store keeps a record of previous definitions associated with that name. You can explore the history of a term and access older versions. -* **Tagging:** You can assign tags to versions of your terms to organize and quickly switch between related function versions. -* **Querying:** The store allows you to search for terms by name, hash, or tags. - ## Installation and Use You can easily build and run this project using [Nix](https://nixos.org/download/). diff --git a/src/ContentStore.hs b/src/ContentStore.hs index 24cfb73..83ba82e 100644 --- a/src/ContentStore.hs +++ b/src/ContentStore.hs @@ -4,10 +4,9 @@ import Research import Parser import Control.Monad (foldM, forM) -import Crypto.Hash (hash, SHA256, Digest) import Data.ByteString (ByteString) -import Data.List (intercalate, nub, sortBy, sort) -import Data.Maybe (catMaybes) +import Data.List (nub, sort) +import Data.Maybe (catMaybes, fromJust) import Data.Text (Text) import Database.SQLite.Simple import Database.SQLite.Simple.FromRow (FromRow(..), field) @@ -15,9 +14,7 @@ import System.Directory (createDirectoryIfMissing, getXdgDirectory import System.FilePath ((), takeDirectory) import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as LBS import qualified Data.Map as Map -import qualified Data.Serialize as Cereal import qualified Data.Text as T data StoredNode = StoredNode ByteString deriving (Show) @@ -28,22 +25,13 @@ instance FromRow StoredNode where data StoredTerm = StoredTerm { termHash :: Text , termNames :: Text - , termData :: ByteString , termMetadata :: Text , termCreatedAt :: Integer , termTags :: Text } deriving (Show) instance FromRow StoredTerm where - fromRow = StoredTerm <$> field <*> field <*> field <*> field <*> field <*> field - -tryDeserializeTerm :: ByteString -> IO (Maybe T) -tryDeserializeTerm bs = - case deserializeTerm bs of - Right t -> return $ Just t - Left err -> do - putStrLn $ "Error deserializing term: " ++ err - return Nothing + fromRow = StoredTerm <$> field <*> field <*> field <*> field <*> field parseNameList :: Text -> [Text] parseNameList = filter (not . T.null) . T.splitOn "," @@ -75,90 +63,70 @@ getContentStorePath = do dataDir <- getXdgDirectory XdgData "tricu" return $ dataDir "content-store.db" -instance Cereal.Serialize T where - put Leaf = Cereal.putWord8 0 - put (Stem t) = do - Cereal.putWord8 1 - Cereal.put t - put (Fork a b) = do - Cereal.putWord8 2 - Cereal.put a - Cereal.put b - get = do - tag <- Cereal.getWord8 - case tag of - 0 -> return Leaf - 1 -> Stem <$> Cereal.get - 2 -> Fork <$> Cereal.get <*> Cereal.get - _ -> fail $ "Invalid tag for T: " ++ show tag - -serializeTerm :: T -> ByteString -serializeTerm = LBS.toStrict . Cereal.encodeLazy - -deserializeTerm :: ByteString -> Either String T -deserializeTerm = Cereal.decodeLazy . LBS.fromStrict hashTerm :: T -> Text hashTerm = nodeHash . buildMerkle storeTerm :: Connection -> [String] -> T -> IO Text storeTerm conn newNamesStrList term = do - putStrLn $ "DEBUG: storeTerm called for names " ++ show newNamesStrList - let termBS = serializeTerm term - termHashText = hashTerm term + let termHashText = hashTerm term newNamesTextList = map T.pack newNamesStrList metadataText = T.pack "{}" - putStrLn $ "DEBUG: termHash = " ++ T.unpack termHashText -- Store all Merkle nodes for this term - putStrLn "DEBUG: storing merkle nodes" _ <- storeMerkleNodes conn term - putStrLn "DEBUG: merkle nodes stored, querying existing" existingNamesQuery <- query conn "SELECT names FROM terms WHERE hash = ?" (Only termHashText) :: IO [Only Text] - putStrLn $ "DEBUG: query result = " ++ show (length existingNamesQuery) ++ " results" case existingNamesQuery of [] -> do - putStrLn "DEBUG: inserting new term" let allNamesToStore = serializeNameList newNamesTextList execute conn "INSERT INTO terms (hash, names, term_data, metadata, tags) VALUES (?, ?, ?, ?, ?)" - (termHashText, allNamesToStore, termBS, metadataText, T.pack "") - putStrLn "DEBUG: insert complete" + (termHashText, allNamesToStore, BS.pack [], metadataText, T.pack "") [(Only currentNamesText)] -> do - putStrLn $ "DEBUG: updating existing term, current names = " ++ T.unpack currentNamesText let currentNamesList = parseNameList currentNamesText let combinedNamesList = currentNamesList ++ newNamesTextList let allNamesToStore = serializeNameList combinedNamesList execute conn "UPDATE terms SET names = ?, metadata = ? WHERE hash = ?" (allNamesToStore, metadataText, termHashText) - putStrLn "DEBUG: update complete" _ -> error $ "Multiple terms with same hash? " ++ show (length existingNamesQuery) - putStrLn "DEBUG: storeTerm returning" return termHashText +-- | Reconstruct a Tree Calculus term from its Merkle root hash. +-- Recursively loads nodes and rebuilds the T structure. +loadTree conn h + | h == nodeHash NLeaf = return (Just Leaf) -- NLeaf is implicit, not stored + | otherwise = do + maybeNode <- getNodeMerkle conn h + case maybeNode of + Nothing -> return Nothing + Just node -> Just <$> buildTree node + where + buildTree :: Node -> IO T + buildTree (NStem childHash) = do + child <- fromJust <$> loadTree conn childHash + return (Stem child) + buildTree (NFork lHash rHash) = do + left <- fromJust <$> loadTree conn lHash + right <- fromJust <$> loadTree conn rHash + return (Fork left right) + -- | Store all nodes of a Merkle DAG by traversing the Term and building/storing nodes. -- Returns the hash of the root node. --- This function builds the Merkle DAG while storing nodes, ensuring children are --- stored before their parents. storeMerkleNodes :: Connection -> T -> IO MerkleHash -storeMerkleNodes _ Leaf = return $ nodeHash NLeaf -- NLeaf hash is constant +storeMerkleNodes _ Leaf = return $ nodeHash NLeaf storeMerkleNodes conn (Stem t) = do - -- Recursively process child first and get its hash childHash <- storeMerkleNodes conn t - -- Store this node let thisNode = NStem childHash putMerkleNode conn thisNode return $ nodeHash thisNode storeMerkleNodes conn (Fork l r) = do - -- Recursively process both children first and get their hashes leftHash <- storeMerkleNodes conn l rightHash <- storeMerkleNodes conn r - -- Store this node let thisNode = NFork leftHash rightHash putMerkleNode conn thisNode return $ nodeHash thisNode @@ -177,6 +145,8 @@ getNodeMerkle conn h = Just (StoredNode bs) -> return $ Just (deserializeNode bs) Nothing -> return Nothing + + hashToTerm :: Connection -> Text -> IO (Maybe StoredTerm) hashToTerm conn hashText = queryMaybeOne conn (selectStoredTermFields <> " WHERE hash = ?") (Only hashText) @@ -204,7 +174,7 @@ loadTerm :: Connection -> String -> IO (Maybe T) loadTerm conn identifier = do result <- getTerm conn (T.pack identifier) case result of - Just storedTerm -> tryDeserializeTerm (termData storedTerm) + Just storedTerm -> loadTree conn (termHash storedTerm) Nothing -> return Nothing getTerm :: Connection -> Text -> IO (Maybe StoredTerm) @@ -219,7 +189,7 @@ loadEnvironment conn = do foldM addTermToEnv Map.empty terms where addTermToEnv env storedTerm = do - maybeT <- tryDeserializeTerm (termData storedTerm) + maybeT <- loadTree conn (termHash storedTerm) case maybeT of Just t -> do let namesList = parseNameList (termNames storedTerm) @@ -230,11 +200,11 @@ termVersions :: Connection -> String -> IO [(Text, T, Integer)] termVersions conn name = do let nameText = T.pack name results <- query conn - ("SELECT hash, term_data, created_at FROM terms WHERE (names = ? OR names LIKE ? OR names LIKE ? OR names LIKE ?) ORDER BY created_at DESC") + ("SELECT hash, created_at FROM terms WHERE (names = ? OR names LIKE ? OR names LIKE ? OR names LIKE ?) ORDER BY created_at DESC") (nameText, nameText <> T.pack ",%", T.pack "%," <> nameText <> T.pack ",%", T.pack "%," <> nameText) - catMaybes <$> mapM (\(hashVal, termDataVal, timestamp) -> do - maybeT <- tryDeserializeTerm termDataVal + catMaybes <$> mapM (\(hashVal, timestamp) -> do + maybeT <- loadTree conn hashVal return $ fmap (\t -> (hashVal, t, timestamp)) maybeT ) results @@ -276,7 +246,7 @@ allTermTags conn = do query_ conn (selectStoredTermFields <> " WHERE tags IS NOT NULL AND tags != '' ORDER BY created_at DESC") selectStoredTermFields :: Query -selectStoredTermFields = "SELECT hash, names, term_data, metadata, created_at, tags FROM terms" +selectStoredTermFields = "SELECT hash, names, metadata, created_at, tags FROM terms" queryMaybeOne :: (FromRow r, ToRow q) => Connection -> Query -> q -> IO (Maybe r) queryMaybeOne conn qry params = do diff --git a/src/Eval.hs b/src/Eval.hs index 28ccf0f..fdfc8d7 100644 --- a/src/Eval.hs +++ b/src/Eval.hs @@ -113,13 +113,7 @@ resolveTermFromStore conn selectedVersions name mhash = case mhash of [(_, term, _)] -> return $ Just term _ -> return Nothing -- Ambiguous or too many matches Nothing -> case Map.lookup name selectedVersions of - Just hash -> do - mterm <- hashToTerm conn hash - case mterm of - Just term -> case deserializeTerm (termData term) of - Right t -> return $ Just t - Left _ -> return Nothing - Nothing -> return Nothing + Just hash -> loadTree conn hash Nothing -> do versions <- termVersions conn name case versions of diff --git a/src/Research.hs b/src/Research.hs index 43683dd..1a0b5ec 100644 --- a/src/Research.hs +++ b/src/Research.hs @@ -87,16 +87,18 @@ nodeHash node = bytesToHex (sha256WithPrefix (serializeNode node)) convert . (hash :: BS.ByteString -> Digest SHA256) $ utf8Tag <> BS.pack [0x00] <> payload utf8Tag = BS.pack $ map fromIntegral $ BS.unpack "tricu.merkle.node.v1" --- | Convert a Hex Text hash into raw 32-byte ByteString +-- | Convert a Hex Text hash into raw ByteString (2 hex chars per byte) hexToBytes :: Text -> BS.ByteString -hexToBytes h = BS.pack $ map (uncurry combinePair) pairs +hexToBytes h = BS.pack $ map combinePair pairs where - pairs :: [(Char, Char)] - pairs = zip (unpack h) (drop 1 $ unpack h) - combinePair :: Char -> Char -> Word8 - combinePair c1 c2 = fromIntegral (nib1 * 16 + nib2) - where nib1 = hexDigitToInt c1 - nib2 = hexDigitToInt c2 + chars = unpack h + pairs = chunkPairs chars + chunkPairs :: String -> [(Char, Char)] + chunkPairs (c1:c2:rest) = (c1, c2) : chunkPairs rest + chunkPairs [] = [] + chunkPairs _ = error "hexToBytes: odd number of hex digits" + combinePair :: (Char, Char) -> Word8 + combinePair (c1, c2) = fromIntegral (hexDigitToInt c1 * 16 + hexDigitToInt c2) hexDigitToInt :: Char -> Int hexDigitToInt c | '0' <= c && c <= '9' = ord c - ord '0' @@ -106,12 +108,22 @@ hexToBytes h = BS.pack $ map (uncurry combinePair) pairs -- | Deserialize a Node from canonical bytes. deserializeNode :: BS.ByteString -> Node -deserializeNode bs - | BS.null bs || BS.head bs == 0x00 = NLeaf - | BS.head bs == 0x01 = NStem $ bytesToHex (BS.take 32 (BS.drop 1 bs)) - | BS.head bs == 0x02 = NFork (bytesToHex (BS.take 32 (BS.drop 1 bs))) - (bytesToHex (BS.take 32 (BS.drop 33 (BS.drop 1 bs)))) - | otherwise = error $ "Unknown node type tag: " ++ show (BS.head bs) +deserializeNode bs = + case BS.uncons bs of + Just (0x00, rest) + | BS.null rest -> NLeaf + + Just (0x01, rest) + | BS.length rest == 32 -> + NStem $ bytesToHex rest + + Just (0x02, rest) + | BS.length rest == 64 -> + let (l, r) = BS.splitAt 32 rest + in NFork (bytesToHex l) (bytesToHex r) + + _ -> error "invalid merkle node payload" + -- | Convert 32-byte ByteString back to hex Text bytesToHex :: BS.ByteString -> Text