Full Merkle tree resolution

This commit is contained in:
2026-05-05 12:43:03 -05:00
parent 72e5810ca9
commit 6b97b210ca
5 changed files with 182 additions and 98 deletions

View File

@@ -4,10 +4,9 @@ import Research
import Parser
import Control.Monad (foldM, forM)
import Crypto.Hash (hash, SHA256, Digest)
import Data.ByteString (ByteString)
import Data.List (intercalate, nub, sortBy, sort)
import Data.Maybe (catMaybes)
import Data.List (nub, sort)
import Data.Maybe (catMaybes, fromJust)
import Data.Text (Text)
import Database.SQLite.Simple
import Database.SQLite.Simple.FromRow (FromRow(..), field)
@@ -15,30 +14,24 @@ import System.Directory (createDirectoryIfMissing, getXdgDirectory
import System.FilePath ((</>), takeDirectory)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Map as Map
import qualified Data.Serialize as Cereal
import qualified Data.Text as T
data StoredNode = StoredNode ByteString deriving (Show)
instance FromRow StoredNode where
fromRow = StoredNode <$> field
data StoredTerm = StoredTerm
{ termHash :: Text
, termNames :: Text
, termData :: ByteString
, termMetadata :: Text
, termCreatedAt :: Integer
, termTags :: Text
} deriving (Show)
instance FromRow StoredTerm where
fromRow = StoredTerm <$> field <*> field <*> field <*> field <*> field <*> field
tryDeserializeTerm :: ByteString -> IO (Maybe T)
tryDeserializeTerm bs =
case deserializeTerm bs of
Right t -> return $ Just t
Left err -> do
putStrLn $ "Error deserializing term: " ++ err
return Nothing
fromRow = StoredTerm <$> field <*> field <*> field <*> field <*> field
parseNameList :: Text -> [Text]
parseNameList = filter (not . T.null) . T.splitOn ","
@@ -60,6 +53,9 @@ initContentStore = do
\tags TEXT DEFAULT '')"
execute_ conn "CREATE INDEX IF NOT EXISTS terms_names_idx ON terms(names)"
execute_ conn "CREATE INDEX IF NOT EXISTS terms_tags_idx ON terms(tags)"
execute_ conn "CREATE TABLE IF NOT EXISTS merkle_nodes (\
\hash TEXT PRIMARY KEY, \
\node_data BLOB NOT NULL)"
return conn
getContentStorePath :: IO FilePath
@@ -67,40 +63,18 @@ getContentStorePath = do
dataDir <- getXdgDirectory XdgData "tricu"
return $ dataDir </> "content-store.db"
instance Cereal.Serialize T where
put Leaf = Cereal.putWord8 0
put (Stem t) = do
Cereal.putWord8 1
Cereal.put t
put (Fork a b) = do
Cereal.putWord8 2
Cereal.put a
Cereal.put b
get = do
tag <- Cereal.getWord8
case tag of
0 -> return Leaf
1 -> Stem <$> Cereal.get
2 -> Fork <$> Cereal.get <*> Cereal.get
_ -> fail $ "Invalid tag for T: " ++ show tag
serializeTerm :: T -> ByteString
serializeTerm = LBS.toStrict . Cereal.encodeLazy
deserializeTerm :: ByteString -> Either String T
deserializeTerm = Cereal.decodeLazy . LBS.fromStrict
hashTerm :: T -> Text
hashTerm = T.pack . show . (hash :: ByteString -> Digest SHA256) . serializeTerm
hashTerm = nodeHash . buildMerkle
storeTerm :: Connection -> [String] -> T -> IO Text
storeTerm conn newNamesStrList term = do
let termBS = serializeTerm term
termHashText = hashTerm term
let termHashText = hashTerm term
newNamesTextList = map T.pack newNamesStrList
metadataText = T.pack "{}"
-- Store all Merkle nodes for this term
_ <- storeMerkleNodes conn term
existingNamesQuery <- query conn
"SELECT names FROM terms WHERE hash = ?"
(Only termHashText) :: IO [Only Text]
@@ -110,7 +84,7 @@ storeTerm conn newNamesStrList term = do
let allNamesToStore = serializeNameList newNamesTextList
execute conn
"INSERT INTO terms (hash, names, term_data, metadata, tags) VALUES (?, ?, ?, ?, ?)"
(termHashText, allNamesToStore, termBS, metadataText, T.pack "")
(termHashText, allNamesToStore, BS.pack [], metadataText, T.pack "")
[(Only currentNamesText)] -> do
let currentNamesList = parseNameList currentNamesText
let combinedNamesList = currentNamesList ++ newNamesTextList
@@ -118,9 +92,61 @@ storeTerm conn newNamesStrList term = do
execute conn
"UPDATE terms SET names = ?, metadata = ? WHERE hash = ?"
(allNamesToStore, metadataText, termHashText)
_ -> error $ "Multiple terms with same hash? " ++ show (length existingNamesQuery)
return termHashText
-- | Reconstruct a Tree Calculus term from its Merkle root hash.
-- Recursively loads nodes and rebuilds the T structure.
loadTree conn h
| h == nodeHash NLeaf = return (Just Leaf) -- NLeaf is implicit, not stored
| otherwise = do
maybeNode <- getNodeMerkle conn h
case maybeNode of
Nothing -> return Nothing
Just node -> Just <$> buildTree node
where
buildTree :: Node -> IO T
buildTree (NStem childHash) = do
child <- fromJust <$> loadTree conn childHash
return (Stem child)
buildTree (NFork lHash rHash) = do
left <- fromJust <$> loadTree conn lHash
right <- fromJust <$> loadTree conn rHash
return (Fork left right)
-- | Store all nodes of a Merkle DAG by traversing the Term and building/storing nodes.
-- Returns the hash of the root node.
storeMerkleNodes :: Connection -> T -> IO MerkleHash
storeMerkleNodes _ Leaf = return $ nodeHash NLeaf
storeMerkleNodes conn (Stem t) = do
childHash <- storeMerkleNodes conn t
let thisNode = NStem childHash
putMerkleNode conn thisNode
return $ nodeHash thisNode
storeMerkleNodes conn (Fork l r) = do
leftHash <- storeMerkleNodes conn l
rightHash <- storeMerkleNodes conn r
let thisNode = NFork leftHash rightHash
putMerkleNode conn thisNode
return $ nodeHash thisNode
-- | Insert a Merkle node into the store (idempotent).
putMerkleNode :: Connection -> Node -> IO ()
putMerkleNode conn node =
execute conn "INSERT OR IGNORE INTO merkle_nodes (hash, node_data) VALUES (?, ?)"
(nodeHash node, serializeNode node)
-- | Retrieve a Merkle node by its hash.
getNodeMerkle :: Connection -> MerkleHash -> IO (Maybe Node)
getNodeMerkle conn h =
queryMaybeOne conn "SELECT node_data FROM merkle_nodes WHERE hash = ?" (Only h) >>= \case
Just (StoredNode bs) -> return $ Just (deserializeNode bs)
Nothing -> return Nothing
hashToTerm :: Connection -> Text -> IO (Maybe StoredTerm)
hashToTerm conn hashText =
queryMaybeOne conn (selectStoredTermFields <> " WHERE hash = ?") (Only hashText)
@@ -148,7 +174,7 @@ loadTerm :: Connection -> String -> IO (Maybe T)
loadTerm conn identifier = do
result <- getTerm conn (T.pack identifier)
case result of
Just storedTerm -> tryDeserializeTerm (termData storedTerm)
Just storedTerm -> loadTree conn (termHash storedTerm)
Nothing -> return Nothing
getTerm :: Connection -> Text -> IO (Maybe StoredTerm)
@@ -163,7 +189,7 @@ loadEnvironment conn = do
foldM addTermToEnv Map.empty terms
where
addTermToEnv env storedTerm = do
maybeT <- tryDeserializeTerm (termData storedTerm)
maybeT <- loadTree conn (termHash storedTerm)
case maybeT of
Just t -> do
let namesList = parseNameList (termNames storedTerm)
@@ -174,11 +200,11 @@ termVersions :: Connection -> String -> IO [(Text, T, Integer)]
termVersions conn name = do
let nameText = T.pack name
results <- query conn
("SELECT hash, term_data, created_at FROM terms WHERE (names = ? OR names LIKE ? OR names LIKE ? OR names LIKE ?) ORDER BY created_at DESC")
("SELECT hash, created_at FROM terms WHERE (names = ? OR names LIKE ? OR names LIKE ? OR names LIKE ?) ORDER BY created_at DESC")
(nameText, nameText <> T.pack ",%", T.pack "%," <> nameText <> T.pack ",%", T.pack "%," <> nameText)
catMaybes <$> mapM (\(hashVal, termDataVal, timestamp) -> do
maybeT <- tryDeserializeTerm termDataVal
catMaybes <$> mapM (\(hashVal, timestamp) -> do
maybeT <- loadTree conn hashVal
return $ fmap (\t -> (hashVal, t, timestamp)) maybeT
) results
@@ -220,7 +246,7 @@ allTermTags conn = do
query_ conn (selectStoredTermFields <> " WHERE tags IS NOT NULL AND tags != '' ORDER BY created_at DESC")
selectStoredTermFields :: Query
selectStoredTermFields = "SELECT hash, names, term_data, metadata, created_at, tags FROM terms"
selectStoredTermFields = "SELECT hash, names, metadata, created_at, tags FROM terms"
queryMaybeOne :: (FromRow r, ToRow q) => Connection -> Query -> q -> IO (Maybe r)
queryMaybeOne conn qry params = do