From aab2ec1c33a4e92abb6f40451dc5ec5d18f685cc Mon Sep 17 00:00:00 2001
From: Reid 'arrdem' McKenzie <me@arrdem.com>
Date: Mon, 11 Oct 2021 00:09:51 -0600
Subject: [PATCH] More improvements and an execution optimizer

---
 projects/cram/src/python/cram/__main__.py | 93 +++++++++++++++++++----
 projects/vfs/src/python/vfs/impl.py       | 14 +++-
 2 files changed, 91 insertions(+), 16 deletions(-)

diff --git a/projects/cram/src/python/cram/__main__.py b/projects/cram/src/python/cram/__main__.py
index 6ec29aa..a6584bc 100644
--- a/projects/cram/src/python/cram/__main__.py
+++ b/projects/cram/src/python/cram/__main__.py
@@ -2,6 +2,7 @@
 
 import argparse
 from itertools import chain
+import pickle
 import logging
 import os
 from pathlib import Path
@@ -16,6 +17,9 @@ log = logging.getLogger(__name__)
 parser = argparse.ArgumentParser()
 parser.add_argument("-x", "--execute", dest="execute", action="store_true", default=False)
 parser.add_argument("-d", "--dry-run", dest="execute", action="store_false")
+parser.add_argument("-s", "--state-file", dest="statefile", default=".cram.log")
+parser.add_argument("--optimize", dest="optimize", default=False, action="store_true")
+parser.add_argument("--no-optimize", dest="optimize", action="store_false")
 parser.add_argument("confdir", type=Path)
 parser.add_argument("destdir", type=Path)
 
@@ -35,6 +39,7 @@ def stow(fs: Vfs, src_dir: Path, dest_dir: Path, skip=[]):
         if src.is_dir():
             fs.mkdir(dest)
             fs.chmod(dest, src.stat().st_mode)
+
         elif src.is_file():
             fs.link(src, dest)
 
@@ -76,18 +81,9 @@ class PackageV0(NamedTuple):
             fs.exec(self.root, ["bash", str(postf)])
 
 
-def main():
-    """The entry point of cram."""
-
-    opts, args = parser.parse_known_args()
-
-    logging.basicConfig(
-        level=logging.DEBUG,
-        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-    )
-
-    root = opts.confdir
-
+def build_fs(root: Path, dest: Path) -> Vfs:
+    """Build a VFS by configuring dest from the given config root."""
+    
     packages = {str(p.relative_to(root)): PackageV0(p, str(p))
                 for p in chain((root / "packages.d").glob("*"),
                                (root / "profiles.d").glob("*"),
@@ -114,11 +110,80 @@ def main():
     requirements = {r: packages[r].requires() for r in requirements}
     fs = Vfs()
 
+    # Abstractly execute the current packages
     for r in toposort_flatten(requirements):
         r = packages[r]
-        r.install(fs, opts.destdir)
+        r.install(fs, dest)
 
-    fs.execute(opts.execute)
+    return fs
+
+
+def load_fs(statefile: Path) -> Vfs:
+    """Load a persisted VFS state from disk. Sort of."""
+
+    oldfs = Vfs()
+
+    if statefile.exists():
+        with open(statefile, "rb") as fp:
+            oldfs._log = pickle.load(fp)
+
+    return oldfs
+    
+
+def simplify(old_fs: Vfs, new_fs: Vfs) -> Vfs:
+    """Try to reduce a new VFS using diff from the original VFS."""
+
+    old_fs = old_fs.copy()
+    new_fs = new_fs.copy()
+    
+    # Scrub anything in the new log that's in the old log
+    for txn in list(old_fs._log):
+        # Except for execs which are stateful
+        if txn[0] == "exec":
+            continue
+
+        new_fs._log.remove(txn)
+        old_fs._log.remove(txn)
+
+    # Look for files in the old log which are no longer present in the new log
+    for txn in old_fs._log:
+        if txn[0] == "link" and txn not in new_fs._log:
+            new_fs.unlink(txn[2])
+        elif txn[0] == "mkdir" and txn not in new_fs.log:
+            new_fs.unlink(txn[1])
+
+    return new_fs
+
+
+def main():
+    """The entry point of cram."""
+
+    opts, args = parser.parse_known_args()
+
+    logging.basicConfig(
+        level=logging.DEBUG,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+
+    # Resolve the two input paths to absolutes
+    root = opts.confdir.resolve()
+    dest = opts.destdir.resolve()
+    statef = root / opts.statefile
+
+    new_fs = build_fs(root, dest)
+    old_fs = load_fs(statef)
+
+    if opts.optimize:
+        fast_fs = simplify(old_fs, new_fs)
+        fast_fs.execute(opts.execute)
+    else:
+        new_fs.execute(opts.execute)
+
+    # Dump the new state.
+    # Note that we dump the UNOPTIMIZED state, because we want to simplify relative complete states.
+    if opts.execute:
+        with open(statef, "wb") as fp:
+            pickle.dump(new_fs._log, fp)
 
 
 if __name__ == "__main__" or 1:
diff --git a/projects/vfs/src/python/vfs/impl.py b/projects/vfs/src/python/vfs/impl.py
index 82138cc..3f451f0 100644
--- a/projects/vfs/src/python/vfs/impl.py
+++ b/projects/vfs/src/python/vfs/impl.py
@@ -12,8 +12,8 @@ _log = logging.getLogger(__name__)
 class Vfs(object):
     """An abstract filesystem device which can accumulate changes, and apply them in a batch."""
 
-    def __init__(self):
-        self._log = []
+    def __init__(self, log=None):
+        self._log = log or []
 
     def execute(self, execute=False):
         for e in self._log:
@@ -46,6 +46,10 @@ class Vfs(object):
                 _, dest = e
                 dest.mkdir(exist_ok=True)
 
+            elif e[0] == "unlink":
+                _, dest = e
+                dest.unlink()
+
     def _append(self, msg):
         self._log.append(msg)
 
@@ -63,3 +67,9 @@ class Vfs(object):
 
     def exec(self, dest, cmd):
         self._append(("exec", dest, cmd))
+
+    def unlink(self, dest):
+        self._append(("unlink", dest))
+
+    def copy(self):
+        return Vfs(list(self._log))