From 0cdc0bfc34078ecd5cd785162158307583641983 Mon Sep 17 00:00:00 2001 From: James Eversole Date: Thu, 7 Aug 2025 15:16:44 -0500 Subject: [PATCH] "size" function nodes down from 454 to 321 --- src/Eval.hs | 394 ++++++++++++++++++++++++++++++++++++++++++++------- test/Spec.hs | 73 +++++++++- 2 files changed, 417 insertions(+), 50 deletions(-) diff --git a/src/Eval.hs b/src/Eval.hs index a69f210..deeb1bc 100644 --- a/src/Eval.hs +++ b/src/Eval.hs @@ -3,11 +3,30 @@ module Eval where import Parser import Research -import Data.List (partition, (\\)) -import Data.Map (Map) +import Data.List (partition, (\\), elemIndex) +import Data.Map (Map) +import Data.Set (Set) + +import qualified Data.Foldable as F import qualified Data.Map as Map import qualified Data.Set as Set +data DB + = BVar Int -- bound (0 = nearest binder) + | BFree String -- free/global + | BLam DB + | BApp DB DB + | BLeaf + | BStem DB + | BFork DB DB + | BStr String + | BInt Integer + | BList [DB] + | BEmpty + deriving (Eq, Show) + +type Uses = [Bool] + evalSingle :: Env -> TricuAST -> Env evalSingle env term | SDef name [] body <- term @@ -63,78 +82,74 @@ elimLambda :: TricuAST -> TricuAST elimLambda = go where go term - | etaReduction term = elimLambda $ etaReduceResult term + | etaReduction term = go (etaReduceResult term) | triagePattern term = _TRI | composePattern term = _B - | lambdaList term = elimLambda $ lambdaListResult term + | lambdaList term = go (lambdaListResult term) | nestedLambda term = nestedLambdaResult term | application term = applicationResult term | otherwise = term - etaReduction (SLambda [v] (SApp f (SVar x))) = v == x && not (isFree v f) + -- patterns (now DB-indexed where it matters) + etaReduction (SLambda [v] (SApp f (SVar x))) = v == x && not (usesBinder v f) etaReduction _ = False - etaReduceResult (SLambda [_] (SApp f _)) = f - triagePattern (SLambda [a] (SLambda [b] (SLambda [c] body))) = body == triageBody a b c + -- triage: \a b c -> TLeaf (TLeaf a b) c (checked in DB with a↦2, b↦1, c↦0) + triagePattern (SLambda [a] (SLambda [b] (SLambda [c] body))) = + toDB [c,b,a] body == triageBodyDB triagePattern _ = False - composePattern (SLambda [f] (SLambda [g] (SLambda [x] body))) = body == composeBody f g x + -- compose: \f g x -> f (g x) (checked in DB with f↦2, g↦1, x↦0) + composePattern (SLambda [f] (SLambda [g] (SLambda [x] body))) = + toDB [x,g,f] body == composeBodyDB composePattern _ = False lambdaList (SLambda [_] (SList _)) = True lambdaList _ = False - lambdaListResult (SLambda [v] (SList xs)) = SLambda [v] (foldr wrapTLeaf TLeaf xs) - wrapTLeaf m r = SApp (SApp TLeaf m) r nestedLambda (SLambda (_:_) _) = True nestedLambda _ = False - nestedLambdaResult (SLambda (v:vs) body) - | null vs = toSKI v (elimLambda body) - | otherwise = elimLambda (SLambda [v] (SLambda vs body)) application (SApp _ _) = True application _ = False - applicationResult (SApp f g) = SApp (elimLambda f) (elimLambda g) - toSKI x (SVar y) - | x == y = _I - | otherwise = SApp _K (SVar y) - toSKI x t@(SApp n u) - | not (isFree x t) = SApp _K t - | otherwise = SApp (SApp _S (toSKI x n)) (toSKI x u) - toSKI x (SList xs) - | not (isFree x (SList xs)) = SApp _K (SList xs) - | otherwise = SList (map (toSKI x) xs) - toSKI x t - | not (isFree x t) = SApp _K t - | otherwise = errorWithoutStackTrace "Unhandled toSKI conversion" + -- rewrites + etaReduceResult (SLambda [_] (SApp f _)) = f - -- Combinators and special forms - _S = parseSingle "t (t (t t t)) t" - _K = parseSingle "t t" - _I = parseSingle "t (t (t t)) t" - _B = parseSingle "t (t (t t (t (t (t t t)) t))) (t t)" - _TRI = parseSingle "t (t (t t (t (t (t t t))))) t" - - -- Pattern bodies - triageBody a b c = SApp (SApp TLeaf (SApp (SApp TLeaf (SVar a)) (SVar b))) (SVar c) - composeBody f g x = SApp (SVar f) (SApp (SVar g) (SVar x)) + lambdaListResult (SLambda [v] (SList xs)) = + SLambda [v] (foldr wrapTLeaf TLeaf xs) + where + wrapTLeaf m r = SApp (SApp TLeaf m) r + + -- The key change: use DB bracket abstraction for the final parameter. + nestedLambdaResult (SLambda (v:vs) body) + | null vs = + let body' = go body + db = toDB [v] body' + in toSKIKiselyov db + | otherwise = go (SLambda [v] (SLambda vs body)) + + applicationResult (SApp f g) = SApp (go f) (go g) + +-- combinators and special forms (unchanged) +_S = parseSingle "t (t (t t t)) t" +_K = parseSingle "t t" +_I = parseSingle "t (t (t t)) t" +_R = parseSingle "(t (t (t t (t (t (t (t (t (t (t t (t (t (t t t)) t))) (t (t (t t (t t))) (t (t (t t t)) t)))) (t t (t t))))))) (t t))" +_C = parseSingle "(t (t (t (t (t t (t (t (t t t)) t))) (t (t (t t (t t))) (t (t (t t t)) t)))) (t t (t t)))" +_B = parseSingle "t (t (t t (t (t (t t t)) t))) (t t)" +_T = SApp _C _I +_TRI = parseSingle "t (t (t t (t (t (t t t))))) t" + +-- pattern bodies (kept for reference; checks are now DB-based) +triageBody a b c = SApp (SApp TLeaf (SApp (SApp TLeaf (SVar a)) (SVar b))) (SVar c) +composeBody f g x = SApp (SVar f) (SApp (SVar g) (SVar x)) isFree :: String -> TricuAST -> Bool -isFree x = Set.member x . freeVars +isFree x t = Set.member x (freeVars t) -freeVars :: TricuAST -> Set.Set String -freeVars (SVar v ) = Set.singleton v -freeVars (SList s ) = foldMap freeVars s -freeVars (SLambda v b ) = foldr Set.delete (freeVars b) v -freeVars (SApp f a ) = freeVars f <> freeVars a -freeVars (TFork l r ) = freeVars l <> freeVars r -freeVars (SDef _ _ b) = freeVars b -freeVars (TStem t ) = freeVars t -freeVars (SInt _ ) = Set.empty -freeVars (SStr _ ) = Set.empty -freeVars TLeaf = Set.empty -freeVars _ = Set.empty +freeVars :: TricuAST -> Set String +freeVars = freeDBNames . toDB [] reorderDefs :: Env -> [TricuAST] -> [TricuAST] reorderDefs env defs @@ -215,3 +230,284 @@ mainResult :: Env -> T mainResult r = case Map.lookup "main" r of Just a -> a Nothing -> errorWithoutStackTrace "No valid definition for `main` found." + +-- Convert named TricuAST to De Bruijn form +toDB :: [String] -> TricuAST -> DB +toDB env = \case + SVar v -> maybe (BFree v) BVar (elemIndex v env) + SLambda vs b -> + let env' = reverse vs ++ env + body = toDB env' b + in foldr (\_ acc -> BLam acc) body vs + SApp f a -> BApp (toDB env f) (toDB env a) + TLeaf -> BLeaf + TStem t -> BStem (toDB env t) + TFork l r -> BFork (toDB env l) (toDB env r) + SStr s -> BStr s + SInt n -> BInt n + SList xs -> BList (map (toDB env) xs) + SEmpty -> BEmpty + SDef{} -> error "toDB: unexpected SDef at this stage" + SImport _ _ -> BEmpty + +-- Does a term depend on the current binder (level 0)? +dependsOnLevel :: Int -> DB -> Bool +dependsOnLevel lvl = \case + BVar k -> k == lvl + BLam t -> dependsOnLevel (lvl + 1) t + BApp f a -> dependsOnLevel lvl f || dependsOnLevel lvl a + BStem t -> dependsOnLevel lvl t + BFork l r -> dependsOnLevel lvl l || dependsOnLevel lvl r + BList xs -> any (dependsOnLevel lvl) xs + _ -> False + +-- Collect free *global* names (i.e., unbound) +freeDBNames :: DB -> Set String +freeDBNames = \case + BFree s -> Set.singleton s + BVar _ -> mempty + BLam t -> freeDBNames t + BApp f a -> freeDBNames f <> freeDBNames a + BLeaf -> mempty + BStem t -> freeDBNames t + BFork l r -> freeDBNames l <> freeDBNames r + BStr _ -> mempty + BInt _ -> mempty + BList xs -> foldMap freeDBNames xs + BEmpty -> mempty + +-- Helper: “is the binder named v used in body?” +usesBinder :: String -> TricuAST -> Bool +usesBinder v body = dependsOnLevel 0 (toDB [v] body) + +-- Expected DB bodies for the named special patterns (under env [a,b,c] -> indices 2,1,0) +triageBodyDB :: DB +triageBodyDB = + BApp (BApp BLeaf (BApp (BApp BLeaf (BVar 2)) (BVar 1))) (BVar 0) + +composeBodyDB :: DB +composeBodyDB = + BApp (BVar 2) (BApp (BVar 1) (BVar 0)) + +-- Convert DB -> TricuAST for subterms that contain NO binders (no BLam, no BVar) +fromDBClosed :: DB -> TricuAST +fromDBClosed = \case + BFree s -> SVar s + BApp f a -> SApp (fromDBClosed f) (fromDBClosed a) + BLeaf -> TLeaf + BStem t -> TStem (fromDBClosed t) + BFork l r -> TFork (fromDBClosed l) (fromDBClosed r) + BStr s -> SStr s + BInt n -> SInt n + BList xs -> SList (map fromDBClosed xs) + BEmpty -> SEmpty + -- Anything bound would be a logic error if we call this correctly. + BLam _ -> error "fromDBClosed: unexpected BLam" + BVar _ -> error "fromDBClosed: unexpected bound variable" + +-- DB-native bracket abstraction over the innermost binder (level 0). +-- This mirrors your old toSKI, but is purely index-driven. +toSKIDB :: DB -> TricuAST +toSKIDB t + | not (dependsOnLevel 0 t) = SApp _K (fromDBClosed t) +toSKIDB (BVar 0) = _I +toSKIDB (BApp n u) = SApp (SApp _S (toSKIDB n)) (toSKIDB u) +toSKIDB (BList xs) = + let anyUses = any (dependsOnLevel 0) xs + in if not anyUses + then SApp _K (SList (map fromDBClosed xs)) + else SList (map toSKIDB xs) +toSKIDB other = + errorWithoutStackTrace $ "Unhandled toSKI(DB) conversion: " ++ show other + +app2 :: TricuAST -> TricuAST -> TricuAST +app2 f x = SApp f x + +app3 :: TricuAST -> TricuAST -> TricuAST -> TricuAST +app3 f x y = SApp (SApp f x) y + +-- Core converter that *does not* perform the λ-step; it just returns (Γ, d). +-- Supported shapes: variables, applications, closed literals (Leaf/Int/Str/Empty), +-- closed lists. For anything where the binder occurs under structural nodes +-- (Stem/Fork/List-with-use), we deliberately bail so the caller can fall back. +kisConv :: DB -> Either String (Uses, TricuAST) +kisConv = \case + BVar 0 -> Right ([True], _I) + BVar n | n > 0 -> do + (g,d) <- kisConv (BVar (n - 1)) + Right (False:g, d) + BApp e1 e2 -> do + (g1,d1) <- kisConv e1 + (g2,d2) <- kisConv e2 + let g = zipWithDefault False (||) g1 g2 -- <— propagate Γ outside (#) + d = kisHash (g1,d1) (g2,d2) -- <— (#) yields only the term + Right (g, d) + -- Treat closed constants as free 'combinator leaves' (no binder use). + BLeaf -> Right ([], TLeaf) + BStr s -> Right ([], SStr s) + BInt n -> Right ([], SInt n) + BEmpty -> Right ([], SEmpty) + -- Closed list: allowed. If binder is used anywhere, we punt to fallback. + BList xs + | any (dependsOnLevel 0) xs -> Left "List with binder use: fallback" + | otherwise -> Right ([], SList (map fromDBClosed xs)) + -- For structural nodes, only allow if *closed* wrt the binder. + BStem t + | dependsOnLevel 0 t -> Left "Stem with binder use: fallback" + | otherwise -> Right ([], TStem (fromDBClosed t)) + BFork l r + | dependsOnLevel 0 l || dependsOnLevel 0 r -> Left "Fork with binder use: fallback" + | otherwise -> Right ([], TFork (fromDBClosed l) (fromDBClosed r)) + -- We shouldn’t see BLam under elim; treat as unsupported so we fallback. + BLam _ -> Left "Nested lambda under body: fallback" + BFree s -> Right ([], SVar s) + +-- Application combiner with K-optimization (lazy weakening). +-- Mirrors Lynn’s 'optK' rules: choose among S, B, C, R based on leading flags. +-- η-aware (#) with K-optimization (adapted from TS kiselyov_eta) +kisHash :: (Uses, TricuAST) -> (Uses, TricuAST) -> TricuAST +kisHash (g1, d1) (g2, d2) = + case g1 of + [] -> case g2 of + [] -> SApp d1 d2 + True:gs2 -> if isId2 (g2, d2) + then d1 + else kisHash ([], SApp _B d1) (gs2, d2) + False:gs2 -> kisHash ([], d1) (gs2, d2) + + True:gs1 -> case g2 of + [] -> if isId2 (g1, d1) + then SApp _T d2 + else kisHash ([], SApp _R d2) (gs1, d1) + _ -> + if isId2 (g1, d1) && case g2 of { False:_ -> True; _ -> False } + then kisHash ([], _T) (tail g2, d2) + else + -- NEW: coalesce the longest run of identical head pairs and apply bulk op once + let ((h1, h2), count) = headPairRun g1 g2 + g1' = drop count g1 + g2' = drop count g2 + in case (h1, h2) of + (False, False) -> + kisHash (g1', d1) (g2', d2) + (False, True) -> + let d1' = kisHash ([], bulkB count) (g1', d1) + in kisHash (g1', d1') (g2', d2) + (True, False) -> + let d1' = kisHash ([], bulkC count) (g1', d1) + in kisHash (g1', d1') (g2', d2) + (True, True) -> + let d1' = kisHash ([], bulkS count) (g1', d1) + in kisHash (g1', d1') (g2', d2) + + False:gs1 -> case g2 of + [] -> kisHash (gs1, d1) ([], d2) + _ -> + if isId2 (g1, d1) && case g2 of { False:_ -> True; _ -> False } + then kisHash ([], _T) (tail g2, d2) + else case g2 of + True:gs2 -> + let d1' = kisHash ([], _B) (gs1, d1) + in kisHash (gs1, d1') (gs2, d2) + False:gs2 -> + kisHash (gs1, d1) (gs2, d2) + where + tail (_:xs) = xs + tail [] = [] + + +toSKIKiselyov :: DB -> TricuAST +toSKIKiselyov body = + case kisConv body of + Right ([], d) -> SApp _K d + Right (True:_ , d) -> d + Right (False:g, d) -> kisHash ([], _K) (g, d) -- no snd + Left _ -> starSKIBCOpEtaDB body -- was: toSKIDB body + +zipWithDefault :: a -> (a -> a -> a) -> [a] -> [a] -> [a] +zipWithDefault d f [] ys = map (f d) ys +zipWithDefault d f xs [] = map (\x -> f x d) xs +zipWithDefault d f (x:xs) (y:ys) = f x y : zipWithDefault d f xs ys + +isNode :: TricuAST -> Bool +isNode t = case t of + TLeaf -> True + _ -> False + +isApp2 :: TricuAST -> Maybe (TricuAST, TricuAST) +isApp2 (SApp a b) = Just (a, b) +isApp2 _ = Nothing + +isKop :: TricuAST -> Bool +isKop t = case isApp2 t of + Just (a,b) -> isNode a && isNode b + _ -> False + +-- detects the two canonical I-shapes in the tree calculus: +-- △ (△ (△ △)) x OR △ (△ △ △) △ +isId :: TricuAST -> Bool +isId t = case isApp2 t of + Just (ab, c) -> case isApp2 ab of + Just (a, b) | isNode a -> + case isApp2 b of + Just (b1, b2) -> + (isNode b1 && isKop b2) || + (isKop b1 && isNode b2 && isNode c) + _ -> False + _ -> False + _ -> False + +-- head-True only, tail empty, and term is identity +isId2 :: (Uses, TricuAST) -> Bool +isId2 (True:[], t) = isId t +isId2 _ = False + +-- Bulk helpers built from SKI (no new primitives) +bPrime :: TricuAST +bPrime = SApp _B _B -- B' = B B + +cPrime :: TricuAST +cPrime = SApp (SApp _B (SApp _B _C)) _B -- C' = B (B C) B + +sPrime :: TricuAST +sPrime = SApp (SApp _B (SApp _B _S)) _B -- S' = B (B S) B + +bulkB :: Int -> TricuAST +bulkB n | n <= 1 = _B + | otherwise = SApp bPrime (bulkB (n - 1)) + +bulkC :: Int -> TricuAST +bulkC n | n <= 1 = _C + | otherwise = SApp cPrime (bulkC (n - 1)) + +bulkS :: Int -> TricuAST +bulkS n | n <= 1 = _S + | otherwise = SApp sPrime (bulkS (n - 1)) + +-- Count how many leading pairs (a,b) repeat at the head of zip g1 g2 +headPairRun :: [Bool] -> [Bool] -> ((Bool, Bool), Int) +headPairRun g1 g2 = + case zip g1 g2 of + [] -> ((False, False), 0) + (h:rest) -> (h, 1 + length (takeWhile (== h) rest)) + +-- DB-native star_skibc_op_eta (adapted from strategies.mts), binder = level 0 +starSKIBCOpEtaDB :: DB -> TricuAST +starSKIBCOpEtaDB t + | not (dependsOnLevel 0 t) = SApp _K (fromDBClosed t) +starSKIBCOpEtaDB (BVar 0) = _I +starSKIBCOpEtaDB (BApp e1 e2) + -- if binder not in right: use C + | not (dependsOnLevel 0 e2) + = SApp (SApp _C (starSKIBCOpEtaDB e1)) (fromDBClosed e2) + -- if binder not in left: + | not (dependsOnLevel 0 e1) + = case e2 of + -- η case: \x. f x ==> f + BVar 0 -> fromDBClosed e1 + _ -> SApp (SApp _B (fromDBClosed e1)) (starSKIBCOpEtaDB e2) + -- otherwise: S + | otherwise + = SApp (SApp _S (starSKIBCOpEtaDB e1)) (starSKIBCOpEtaDB e2) +-- Structural nodes with binder underneath: fall back to plain SKI (rare) +starSKIBCOpEtaDB other = toSKIDB other diff --git a/test/Spec.hs b/test/Spec.hs index c3af997..9ddfc7a 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -35,6 +35,8 @@ tests = testGroup "Tricu Tests" , modules , demos , decoding + , elimLambdaSingle + , stressElimLambda ] lexer :: TestTree @@ -533,7 +535,7 @@ demos = testGroup "Test provided demo functionality" decodeResult res @?= "\"(t (t (t t) (t t t)) (t t (t t t)))\"" , testCase "Determining the size of functions" $ do res <- liftIO $ evaluateFileResult "./demos/size.tri" - decodeResult res @?= "454" + decodeResult res @?= "321" , testCase "Level Order Traversal demo" $ do res <- liftIO $ evaluateFileResult "./demos/levelOrderTraversal.tri" decodeResult res @?= "\"\n1 \n2 3 \n4 5 6 7 \n8 11 10 9 12 \"" @@ -570,3 +572,72 @@ decoding = testGroup "Decoding Tests" let input = ofList [ofList [ofString "nested"], ofString "string"] decodeResult input @?= "[[\"nested\"], \"string\"]" ] + +elimLambdaSingle :: TestTree +elimLambdaSingle = testCase "elimLambda preserves eval, fires eta, and SDef binds" $ do + -- 1) eta reduction, purely structural and parsed from source + let [etaIn] = parseTricu "x : f x" + [fRef ] = parseTricu "f" + elimLambda etaIn @?= fRef + + -- 2) SDef binds its own name and parameters + let [defFXY] = parseTricu "f x y : f x" + fv = freeVars defFXY + assertBool "f should be bound in SDef" ("f" `Set.notMember` fv) + assertBool "x should be bound in SDef" ("x" `Set.notMember` fv) + assertBool "y should be bound in SDef" ("y" `Set.notMember` fv) + + -- 3) semantics preserved on a small program that exercises compose and triage + let src = + unlines + [ "false = t" + , "_ = t" + , "true = t t" + , "id = a : a" + , "const = a b : a" + , "compose = f g x : f (g x)" + , "triage = leaf stem fork : t (t leaf stem) fork" + , "test = triage \"Leaf\" (_ : \"Stem\") (_ _ : \"Fork\")" + , "main = compose id id test" + ] + prog = parseTricu src + progElim = map elimLambda prog + evalBefore = result (evalTricu Map.empty prog) + evalAfter = result (evalTricu Map.empty progElim) + evalAfter @?= evalBefore + +stressElimLambda :: TestTree +stressElimLambda = testCase "stress elimLambda on wide list under deep curried lambda" $ do + let numVars = 200 + numBody = 800 + vars = [ "x" ++ show i | i <- [1..numVars] ] + body = "(" ++ unwords (replicate numBody "t") ++ ")" + etaOne = "h : f h" + etaTwo = "k : id k" + defId = "id = a : a" + lambda = unwords vars ++ " : " ++ body + src = unlines + [ defId + , etaOne + , "compose = f g x : f (g x)" + , "f = t t" + , etaTwo + , lambda + , "main = compose id id (" ++ head vars ++ " : f " ++ head vars ++ ")" + ] + prog = parseTricu src + + let out = map elimLambda prog + let noLambda term = case term of + SLambda _ _ -> False + SApp f g -> noLambda f && noLambda g + SList xs -> all noLambda xs + TFork l r -> noLambda l && noLambda r + TStem u -> noLambda u + _ -> True + + assertBool "all lambdas eliminated" (all noLambda out) + + let before = result (evalTricu Map.empty prog) + after = result (evalTricu Map.empty out) + after @?= before