Merge content store

This commit is contained in:
2025-05-22 16:13:57 -05:00
parent 3717942589
commit 43e83be9a4
13 changed files with 1000 additions and 217 deletions

228
src/ContentStore.hs Normal file
View File

@ -0,0 +1,228 @@
module ContentStore where
import Research
import Parser
import Control.Monad (foldM, forM)
import Crypto.Hash (hash, SHA256, Digest)
import Data.ByteString (ByteString)
import Data.List (intercalate, nub, sortBy, sort)
import Data.Maybe (catMaybes)
import Data.Text (Text)
import Database.SQLite.Simple
import Database.SQLite.Simple.FromRow (FromRow(..), field)
import System.Directory (createDirectoryIfMissing, getXdgDirectory, XdgDirectory(..))
import System.FilePath ((</>), takeDirectory)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Map as Map
import qualified Data.Serialize as Cereal
import qualified Data.Text as T
data StoredTerm = StoredTerm
{ termHash :: Text
, termNames :: Text
, termData :: ByteString
, termMetadata :: Text
, termCreatedAt :: Integer
, termTags :: Text
} deriving (Show)
instance FromRow StoredTerm where
fromRow = StoredTerm <$> field <*> field <*> field <*> field <*> field <*> field
tryDeserializeTerm :: ByteString -> IO (Maybe T)
tryDeserializeTerm bs =
case deserializeTerm bs of
Right t -> return $ Just t
Left err -> do
putStrLn $ "Error deserializing term: " ++ err
return Nothing
parseNameList :: Text -> [Text]
parseNameList = filter (not . T.null) . T.splitOn ","
serializeNameList :: [Text] -> Text
serializeNameList = T.intercalate "," . nub . sort
initContentStore :: IO Connection
initContentStore = do
dbPath <- getContentStorePath
createDirectoryIfMissing True (takeDirectory dbPath)
conn <- open dbPath
execute_ conn "CREATE TABLE IF NOT EXISTS terms (\
\hash TEXT PRIMARY KEY, \
\names TEXT, \
\term_data BLOB, \
\metadata TEXT, \
\created_at INTEGER DEFAULT (strftime('%s','now')), \
\tags TEXT DEFAULT '')"
execute_ conn "CREATE INDEX IF NOT EXISTS terms_names_idx ON terms(names)"
execute_ conn "CREATE INDEX IF NOT EXISTS terms_tags_idx ON terms(tags)"
return conn
getContentStorePath :: IO FilePath
getContentStorePath = do
dataDir <- getXdgDirectory XdgData "tricu"
return $ dataDir </> "content-store.db"
instance Cereal.Serialize T where
put Leaf = Cereal.putWord8 0
put (Stem t) = do
Cereal.putWord8 1
Cereal.put t
put (Fork a b) = do
Cereal.putWord8 2
Cereal.put a
Cereal.put b
get = do
tag <- Cereal.getWord8
case tag of
0 -> return Leaf
1 -> Stem <$> Cereal.get
2 -> Fork <$> Cereal.get <*> Cereal.get
_ -> fail $ "Invalid tag for T: " ++ show tag
serializeTerm :: T -> ByteString
serializeTerm = LBS.toStrict . Cereal.encodeLazy
deserializeTerm :: ByteString -> Either String T
deserializeTerm = Cereal.decodeLazy . LBS.fromStrict
hashTerm :: T -> Text
hashTerm = T.pack . show . (hash :: ByteString -> Digest SHA256) . serializeTerm
storeTerm :: Connection -> [String] -> T -> IO Text
storeTerm conn newNamesStrList term = do
let termBS = serializeTerm term
termHashText = hashTerm term
newNamesTextList = map T.pack newNamesStrList
metadataText = T.pack "{}"
existingNamesQuery <- query conn
"SELECT names FROM terms WHERE hash = ?"
(Only termHashText) :: IO [Only Text]
case existingNamesQuery of
[] -> do
let allNamesToStore = serializeNameList newNamesTextList
execute conn
"INSERT INTO terms (hash, names, term_data, metadata, tags) VALUES (?, ?, ?, ?, ?)"
(termHashText, allNamesToStore, termBS, metadataText, T.pack "")
[(Only currentNamesText)] -> do
let currentNamesList = parseNameList currentNamesText
let combinedNamesList = currentNamesList ++ newNamesTextList
let allNamesToStore = serializeNameList combinedNamesList
execute conn
"UPDATE terms SET names = ?, metadata = ? WHERE hash = ?"
(allNamesToStore, metadataText, termHashText)
return termHashText
hashToTerm :: Connection -> Text -> IO (Maybe StoredTerm)
hashToTerm conn hashText =
queryMaybeOne conn (selectStoredTermFields <> " WHERE hash = ?") (Only hashText)
nameToTerm :: Connection -> Text -> IO (Maybe StoredTerm)
nameToTerm conn nameText =
queryMaybeOne conn (selectStoredTermFields <> " WHERE names LIKE ? ORDER BY created_at DESC LIMIT 1") (Only $ "%" <> nameText <> "%")
listStoredTerms :: Connection -> IO [StoredTerm]
listStoredTerms conn =
query_ conn (selectStoredTermFields <> " ORDER BY created_at DESC")
storeEnvironment :: Connection -> Env -> IO [(String, Text)]
storeEnvironment conn env = do
let defs = Map.toList $ Map.delete "!result" env
let groupedDefs = Map.toList $ Map.fromListWith (++) [(term, [name]) | (name, term) <- defs]
forM groupedDefs $ \(term, namesList) -> do
hashVal <- storeTerm conn namesList term
return (head namesList, hashVal)
loadTerm :: Connection -> String -> IO (Maybe T)
loadTerm conn identifier = do
result <- getTerm conn (T.pack identifier)
case result of
Just storedTerm -> tryDeserializeTerm (termData storedTerm)
Nothing -> return Nothing
getTerm :: Connection -> Text -> IO (Maybe StoredTerm)
getTerm conn identifier = do
if '#' `elem` (T.unpack identifier)
then hashToTerm conn (T.pack $ drop 1 (T.unpack identifier))
else nameToTerm conn identifier
loadEnvironment :: Connection -> IO Env
loadEnvironment conn = do
terms <- listStoredTerms conn
foldM addTermToEnv Map.empty terms
where
addTermToEnv env storedTerm = do
maybeT <- tryDeserializeTerm (termData storedTerm)
case maybeT of
Just t -> do
let namesList = parseNameList (termNames storedTerm)
return $ foldl (\e name -> Map.insert (T.unpack name) t e) env namesList
Nothing -> return env
termVersions :: Connection -> String -> IO [(Text, T, Integer)]
termVersions conn name = do
let nameText = T.pack name
results <- query conn
"SELECT hash, term_data, created_at FROM terms WHERE names LIKE ? ORDER BY created_at DESC"
(Only $ "%" <> nameText <> "%")
catMaybes <$> mapM (\(hashVal, termDataVal, timestamp) -> do
maybeT <- tryDeserializeTerm termDataVal
return $ fmap (\t -> (hashVal, t, timestamp)) maybeT
) results
setTag :: Connection -> Text -> Text -> IO ()
setTag conn hash tagValue = do
exists <- termExists conn hash
if exists
then do
currentTagsQuery <- query conn "SELECT tags FROM terms WHERE hash = ?" (Only hash) :: IO [Only Text]
case currentTagsQuery of
[Only tagsText] -> do
let tagsList = parseNameList tagsText
newTagsList = tagValue : tagsList
newTags = serializeNameList newTagsList
execute conn "UPDATE terms SET tags = ? WHERE hash = ?" (newTags, hash)
_ -> putStrLn $ "Term with hash " ++ T.unpack hash ++ " not found (should not happen if exists is true)"
else
putStrLn $ "Term with hash " ++ T.unpack hash ++ " does not exist"
termExists :: Connection -> Text -> IO Bool
termExists conn hash = do
results <- query conn "SELECT 1 FROM terms WHERE hash = ? LIMIT 1" (Only hash) :: IO [[Int]]
return $ not (null results)
termToTags :: Connection -> Text -> IO [Text]
termToTags conn hash = do
tagsQuery <- query conn "SELECT tags FROM terms WHERE hash = ?" (Only hash) :: IO [Only Text]
case tagsQuery of
[Only tagsText] -> return $ parseNameList tagsText
_ -> return []
tagToTerm :: Connection -> Text -> IO [StoredTerm]
tagToTerm conn tagValue = do
let pattern = "%" <> tagValue <> "%"
query conn (selectStoredTermFields <> " WHERE tags LIKE ? ORDER BY created_at DESC") (Only pattern)
allTermTags :: Connection -> IO [StoredTerm]
allTermTags conn = do
query_ conn (selectStoredTermFields <> " WHERE tags IS NOT NULL AND tags != '' ORDER BY created_at DESC")
selectStoredTermFields :: Query
selectStoredTermFields = "SELECT hash, names, term_data, metadata, created_at, tags FROM terms"
queryMaybeOne :: (FromRow r, ToRow q) => Connection -> Query -> q -> IO (Maybe r)
queryMaybeOne conn qry params = do
results <- query conn qry params
case results of
[row] -> return $ Just row
_ -> return Nothing