From 71d9b64e3dc97183c8ab20f10638223426a7bfa2 Mon Sep 17 00:00:00 2001
From: Jade Lovelace <lix@jade.fyi>
Date: Mon, 1 Jul 2024 13:20:55 -0700
Subject: [PATCH] wip

Change-Id: I3ddb26d2cd96670c111a4af773d19163f1f48382
---
 src/libstore/gc.cc                | 241 ++++++++++++++++++++++++++++--
 src/libstore/path.hh              |   5 +
 src/libutil/file-system.cc        |  14 +-
 src/libutil/file-system.hh        | 106 ++++++++++++-
 src/libutil/sync.hh               |   2 +-
 tests/functional/flakes/flakes.sh |   2 +-
 6 files changed, 348 insertions(+), 22 deletions(-)

diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc
index a088d633f..ae5024acb 100644
--- a/src/libstore/gc.cc
+++ b/src/libstore/gc.cc
@@ -1,11 +1,17 @@
+#include "file-descriptor.hh"
+#include "file-system.hh"
 #include "globals.hh"
+#include "hash.hh"
 #include "local-store.hh"
+#include "path.hh"
 #include "pathlocks.hh"
 #include "processes.hh"
 #include "signals.hh"
 #include "finally.hh"
+#include "thread-pool.hh"
 #include "unix-domain-socket.hh"
 
+#include <limits>
 #include <queue>
 #include <regex>
 
@@ -24,6 +30,9 @@ namespace nix {
 
 constexpr static const std::string_view gcSocketPath = "/gc-socket/socket";
 constexpr static const std::string_view gcRootsDir = "gcroots";
+/** /nix/store/.garbage-pending-deletion */
+constexpr static const std::string_view gcGarbageBin = "/.garbage-pending-deletion";
+constexpr static const std::string_view gcGarbageBinName = gcGarbageBin.substr(1);
 
 
 static void makeSymlink(const Path & link, const Path & target)
@@ -358,14 +367,212 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor)
     }
 }
 
+/*
+ * Note [Deletion]
+ * ~~~~~~~~~~~~~~~
+ *
+ * Lix generally tries to maintain the invariant that an extant ("valid") store
+ * path doesn't have any missing dependencies in the store. This means that
+ * when deleting the store path, it needs to be done in an appropriate
+ * topological order that store paths disappear before their dependencies.
+ *
+ * This, however, conflicts with the desire to delete paths faster without
+ * caring about ordering. However, nobody said that we have to *delete* the
+ * store paths to make them disappear; we do in fact only care about making
+ * them disappear. This leads to the following design to implement a fast GC
+ * without rewriting the entire thing:
+ * 1. Keep topologically "deleting" store paths as before.
+ * 2. Make the deletion operation *much* faster. In particular, we can achieve
+ *    this by using renameat into a garbage bin instead of actually deleting
+ *    the store path synchronously.
+ * 3. After renaming the paths, dispatch them to a thread pool to delete the
+ *    garbage bin asynchronously.
+ *
+ * This effectively transitions the GC from interleaving deletion and marking
+ * to exclusively marking, with deletion happening asynchronously on other
+ * threads.
+ *
+ * There are some edge cases to a garbage bin, however:
+ * - How do you avoid the "begin deleting a store path, get interrupted, it
+ *   reappears, and gets deleted again" problem? That is, the rename might fail
+ *   on the second iteration. We can fix this by adding entropy into the file
+ *   names in addition to the actual store paths.
+ * - How do you deal with getting interrupted mid way through the deletion
+ *   after the marking is complete? The simplest solution here is to just list
+ *   the garbage bin at startup and add all items in it to the deletion queue.
+ */
 
 struct GCLimitReached : std::exception { };
 
+/**
+ * Class that deletes paths from the Nix store in a fast manner.
+ *
+ * Has loose semantics for the deleted amount; will delete the amount requested
+ * and try to not go over by too much.
+ *
+ * See Note [Deletion] for the design of the concurrent deleter.
+ */
+class GCDeleter
+{
+    std::unique_ptr<ThreadPool> pool;
+    AutoCloseDir storeDir;
+    AutoCloseDir garbageBinDir;
+    /** Path to the garbage bin, only used in errors */
+    std::string const garbageBinDirName;
+
+    std::optional<uint64_t> const deletionLimit;
+
+    struct State
+    {
+        uint64_t deletedSoFar = 0;
+        bool printed = false;
+    };
+
+    Sync<State> _state;
+
+    void noteDeletionAmount(uint64_t bytesFreed)
+    {
+        auto state{_state.lock()};
+        state->deletedSoFar += bytesFreed;
+        if (deletionLimit.has_value() && state->deletedSoFar >= *deletionLimit && !state->printed) {
+            state->printed = true;
+            printInfo("deleted more than %d bytes; stopping", *deletionLimit);
+        }
+    }
+
+    bool reachedDeletionLimit()
+    {
+        if (!deletionLimit.has_value()) {
+            return false;
+        }
+        auto state{_state.lock()};
+        return state->deletedSoFar >= *deletionLimit;
+    }
+
+public:
+    GCDeleter(Path store, uint64_t deletionLimit)
+        : pool{std::make_unique<ThreadPool>()}
+        , storeDir(store)
+        , garbageBinDirName(store + gcGarbageBin)
+        , deletionLimit(
+              deletionLimit == std::numeric_limits<uint64_t>::max() ? std::nullopt
+                                                                    : std::optional(deletionLimit)
+          )
+        , _state{State{}}
+    {
+        if (mkdir(garbageBinDirName.c_str(), 0775) == -1 && errno != EEXIST) {
+            throw SysError("creating directory '%1%'", garbageBinDirName);
+        }
+        garbageBinDir = AutoCloseDir(garbageBinDirName);
+    }
+
+    /** Deletes the existing set of garbage that is already in the garbage bin. */
+    bool deleteExistingGarbage()
+    {
+        auto entries = readDirectory(garbageBinDir.get(), garbageBinDirName);
+        for (auto & entry : entries) {
+            if (submitAlreadyBinnedPathForDeletion(entry.name)) {
+                return true;
+            }
+        }
+
+        // Synchronously wait for all the deletion to finish if we care about
+        // deletion limits. If we don't care, we can just keep going.
+        if (deletionLimit.has_value()) {
+            pool->process();
+            pool = std::make_unique<ThreadPool>();
+            return reachedDeletionLimit();
+        } else {
+            return false;
+        }
+    }
+
+    bool submitAlreadyBinnedPathForDeletion(std::string const nameInGarbageBin)
+    {
+        if (reachedDeletionLimit()) {
+            pool->process();
+            return true;
+        }
+
+        pool->enqueue([this, nameInGarbageBin]() {
+            // This leaves paths in the garbage bin if we need to exit early.
+            // This is somewhat unfortunate, but it ensures that we exit
+            // promptly after we collect enough garbage.
+            if (reachedDeletionLimit()) {
+                return;
+            }
+            uint64_t freed = 0;
+
+            printInfo("actually deleting path %1%/%2%", this->garbageBinDirName, nameInGarbageBin);
+            deletePath(garbageBinDir.dirfd(), nameInGarbageBin.c_str(), freed);
+
+            this->noteDeletionAmount(freed);
+        });
+        return false;
+    }
+
+    /** Submit a path to be deleted. Will do nothing and return true if enough has already been
+     * deleted.
+     *
+     * \return whether we have already deleted enough to satisfy the target amount
+     */
+    bool submitStorePathForDeletion(StorePath storePath)
+    {
+        return submitPathForDeletion(std::string(storePath.name()), storePath.asStringRef());
+    }
+
+    bool submitPathForDeletion(const std::string & name, const std::string & fileNameInStore)
+    {
+        // Randomize the store path so it will never collide.
+        StorePath randomStorePath = StorePath::random(name);
+
+        struct stat64 st;
+        if (fstatat64(storeDir.dirfd(), fileNameInStore.c_str(), &st, AT_SYMLINK_NOFOLLOW) != 0) {
+            throw SysError("failure to fstatat '%1%' in the store", fileNameInStore);
+        }
+
+        // You cannot fchmodat a symbolic link, apparently.
+        if (!S_ISLNK(st.st_mode)) {
+            if (fchmodat(storeDir.dirfd(), fileNameInStore.c_str(), 0775, 0) == -1) {
+                throw SysError("give write permission to put '%1%' in the garbage bin", fileNameInStore);
+            }
+        }
+
+        if (renameat(
+                storeDir.dirfd(),
+                fileNameInStore.c_str(),
+                garbageBinDir.dirfd(),
+                randomStorePath.asStringRef().c_str()
+            )
+            == -1)
+        {
+            throw SysError("putting '%1%' in the garbage bin '%2%' as '%3%'", fileNameInStore, garbageBinDirName, randomStorePath.asStringRef());
+        }
+
+        return submitAlreadyBinnedPathForDeletion(randomStorePath.asStringRef());
+    }
+
+    uint64_t deletedSoFar()
+    {
+        auto state{_state.lock()};
+        return state->deletedSoFar;
+    }
+
+    ~GCDeleter() noexcept(false)
+    {
+        pool->process();
+        if (unlinkat(storeDir.dirfd(), std::string(gcGarbageBinName).c_str(), AT_REMOVEDIR) == -1) {
+            warn("ignored an error removing the garbage bin %s: %s", garbageBinDirName, strerror(errno));
+        }
+    }
+};
+
 
 /**
  * Delegate class to expose just the operations required to perform GC on a store.
  */
-class GCStoreDelegate {
+class GCStoreDelegate
+{
     LocalStore const & store;
 
     public:
@@ -380,7 +587,8 @@ class GCStoreDelegate {
 /**
  * Class holding a server to receive new GC roots.
  */
-class GCOperation {
+class GCOperation
+{
     const GCStoreDelegate store;
 
     std::thread serverThread;
@@ -584,6 +792,8 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
 
     GCOperation gcServer {*this, stateDir.get()};
 
+    GCDeleter deleter{realStoreDir, options.maxFreed};
+
     /* Find the roots.  Since we've grabbed the GC lock, the set of
        permanent roots cannot increase now. */
     printInfo("finding garbage collector roots...");
@@ -608,7 +818,7 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
 
     /* Helper function that deletes a path from the store and throws
        GCLimitReached if we've deleted enough garbage. */
-    auto deleteFromStore = [&](std::string_view baseName)
+    auto deleteFromStore = [&](std::string const & storePathName, std::string const & baseName)
     {
         Path path = storeDir + "/" + std::string(baseName);
         Path realPath = realStoreDir + "/" + std::string(baseName);
@@ -625,15 +835,11 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
         }
 
         printInfo("deleting '%1%'", path);
+        bool reachedLimit = deleter.submitPathForDeletion(storePathName, baseName);
 
         results.paths.insert(path);
 
-        uint64_t bytesFreed;
-        deletePath(realPath, bytesFreed);
-        results.bytesFreed += bytesFreed;
-
-        if (results.bytesFreed > options.maxFreed) {
-            printInfo("deleted more than %d bytes; stopping", options.maxFreed);
+        if (reachedLimit) {
             throw GCLimitReached();
         }
     };
@@ -739,7 +945,7 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
             if (!dead.insert(path).second) continue;
             if (shouldDelete) {
                 invalidatePathChecked(path);
-                deleteFromStore(path.to_string());
+                deleteFromStore(std::string(path.name()), path.asStringRef());
                 referrersCache.erase(path);
             }
         }
@@ -761,10 +967,17 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
 
     } else if (options.maxFreed > 0) {
 
-        if (shouldDelete)
+        if (shouldDelete) {
             printInfo("deleting garbage...");
-        else
+            // Check if we can satisfy the deletion request by deleting garbage
+            // we already have marked but which didn't get deleted due to a
+            // previously interrupted process.
+            if (deleter.deleteExistingGarbage()) {
+                return;
+            }
+        } else {
             printInfo("determining live/dead paths...");
+        }
 
         try {
             AutoCloseDir dir(opendir(realStoreDir.get().c_str()));
@@ -779,12 +992,12 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
             while (errno = 0, dirent = readdir(dir.get())) {
                 checkInterrupt();
                 std::string name = dirent->d_name;
-                if (name == "." || name == ".." || name == linksName) continue;
+                if (name == "." || name == ".." || name == linksName || name == gcGarbageBinName) continue;
 
                 if (auto storePath = maybeParseStorePath(storeDir + "/" + name))
                     deleteReferrersClosure(*storePath);
                 else
-                    deleteFromStore(name);
+                    deleteFromStore(name, name);
 
             }
         } catch (GCLimitReached & e) {
diff --git a/src/libstore/path.hh b/src/libstore/path.hh
index 4ca6747b3..67d727cfd 100644
--- a/src/libstore/path.hh
+++ b/src/libstore/path.hh
@@ -40,6 +40,11 @@ public:
         return baseName;
     }
 
+    std::string const & asStringRef() const
+    {
+        return baseName;
+    }
+
     bool operator < (const StorePath & other) const
     {
         return baseName < other.baseName;
diff --git a/src/libutil/file-system.cc b/src/libutil/file-system.cc
index f0199d36f..460ce0811 100644
--- a/src/libutil/file-system.cc
+++ b/src/libutil/file-system.cc
@@ -282,8 +282,7 @@ DirEntries readDirectory(DIR *dir, const Path & path)
 
 DirEntries readDirectory(const Path & path)
 {
-    AutoCloseDir dir(opendir(path.c_str()));
-    if (!dir) throw SysError("opening directory '%1%'", path);
+    AutoCloseDir dir(path);
 
     return readDirectory(dir.get(), path);
 }
@@ -380,6 +379,7 @@ static void _deletePath(int parentfd, const Path & path, uint64_t & bytesFreed)
 
     std::string name(baseNameOf(path));
 
+    // FIXME: this needs to be completely redone to open the damn thing first so it is not TOCTOUable
     struct stat st;
     if (fstatat(parentfd, name.c_str(), &st, AT_SYMLINK_NOFOLLOW) == -1) {
         if (errno == ENOENT) return;
@@ -418,14 +418,14 @@ static void _deletePath(int parentfd, const Path & path, uint64_t & bytesFreed)
                 throw SysError("chmod '%1%'", path);
         }
 
-        int fd = openat(parentfd, path.c_str(), O_RDONLY);
+        int fd = openat(parentfd, name.c_str(), O_RDONLY | O_DIRECTORY);
         if (fd == -1)
             throw SysError("opening directory '%1%'", path);
         AutoCloseDir dir(fdopendir(fd));
         if (!dir)
             throw SysError("opening directory '%1%'", path);
         for (auto & i : readDirectory(dir.get(), path))
-            _deletePath(dirfd(dir.get()), path + "/" + i.name, bytesFreed);
+            _deletePath(dir.dirfd(), path + "/" + i.name, bytesFreed);
     }
 
     int flags = S_ISDIR(st.st_mode) ? AT_REMOVEDIR : 0;
@@ -465,6 +465,12 @@ void deletePath(const Path & path, uint64_t & bytesFreed)
     _deletePath(path, bytesFreed);
 }
 
+void deletePath(int dirfd, const std::string & name, uint64_t & bytesFreed)
+{
+    assert(name.find("/") == std::string::npos);
+    _deletePath(dirfd, name, bytesFreed);
+}
+
 Paths createDirs(const Path & path)
 {
     Paths created;
diff --git a/src/libutil/file-system.hh b/src/libutil/file-system.hh
index e49323e84..ee3422f50 100644
--- a/src/libutil/file-system.hh
+++ b/src/libutil/file-system.hh
@@ -149,8 +149,16 @@ struct DirEntry
         : name(std::move(name)), ino(ino), type(type) { }
 };
 
-typedef std::vector<DirEntry> DirEntries;
+using DirEntries = std::vector<DirEntry>;
 
+/** Reads the directory entries of the provided directory as a vector
+ * \param dir Directory fd to read the entries of.
+ * \param path Human readable path name of the directory for use in error messages.
+ */
+DirEntries readDirectory(DIR *dir, const Path & path);
+/** Reads the directory entries of the provided directory as a vector
+ * \param path Path to the directory.
+ */
 DirEntries readDirectory(const Path & path);
 
 unsigned char getFileType(const Path & path);
@@ -182,6 +190,14 @@ void deletePath(const Path & path);
 
 void deletePath(const Path & path, uint64_t & bytesFreed);
 
+/**
+ * Recursively deletes a path by name from a dirfd.
+ * \param dirfd dirfd of the parent directory
+ * \param name Name of the file, must not contain any slashes
+ * \param bytesFreed Out parameter for the bytes freed by this operation
+ */
+void deletePath(int dirfd, const std::string & name, uint64_t & bytesFreed);
+
 /**
  * Create a directory and all its parents, if necessary.  Returns the
  * list of created directories, in order of creation.
@@ -261,7 +277,93 @@ struct DIRDeleter
     }
 };
 
-typedef std::unique_ptr<DIR, DIRDeleter> AutoCloseDir;
+/** A smart pointer for a `DIR *` object */
+class AutoCloseDir
+{
+    using InnerT = std::unique_ptr<DIR, DIRDeleter>;
+    InnerT inner;
+
+    public:
+    AutoCloseDir(AutoCloseDir const & other) = delete;
+    AutoCloseDir(AutoCloseDir && other)
+    {
+        auto tmp = std::move(other.inner);
+        other.inner = nullptr;
+        this->inner = std::move(tmp);
+    }
+
+    explicit operator bool()
+    {
+        return bool(this->inner);
+    }
+
+    AutoCloseDir & operator=(AutoCloseDir && other)
+    {
+        inner = std::move(other.inner);
+        return *this;
+    }
+
+    DIR const * operator->() const
+    {
+        return inner.get();
+    }
+
+    DIR * operator->()
+    {
+        return inner.get();
+    }
+
+    DIR const * get() const
+    {
+        return inner.get();
+    }
+
+    DIR * get()
+    {
+        return inner.get();
+    }
+
+    /** Releases the inner pointer of the AutoCloseDir, giving an owning pointer. */
+    [[nodiscard("leaking an AutoCloseDir")]]
+    DIR * release()
+    {
+        return inner.release();
+    }
+
+    void reset(InnerT::pointer newValue = nullptr)
+    {
+        inner.reset(std::move(newValue));
+    }
+
+    int dirfd() const
+    {
+        assert(inner);
+        return ::dirfd(inner.get());
+    }
+
+    /** Make an AutoCloseDir that points to nothing. */
+    AutoCloseDir() {}
+    /** Make an AutoCloseDir of an existing DIR *.
+     * Sets the provided pointer to nullptr as it takes ownership.
+     */
+    AutoCloseDir(DIR * & dir) : inner(dir)
+    {
+        dir = nullptr;
+    }
+    /** Makes an AutoCloseDir of a DIR * temporary. */
+    AutoCloseDir(DIR * && dir) : inner(dir) {}
+
+    /** Open the given directory by path.
+     * \throws SysError if opening failed
+     */
+    AutoCloseDir(std::string const & path)
+    {
+        AutoCloseDir dir(opendir(path.c_str()));
+        if (!dir.inner) throw SysError("opening directory '%1%'", path);
+
+        new (this) AutoCloseDir(std::move(dir));
+    }
+};
 
 /**
  * Create a temporary directory.
diff --git a/src/libutil/sync.hh b/src/libutil/sync.hh
index 2c5424f2a..e4ba52f45 100644
--- a/src/libutil/sync.hh
+++ b/src/libutil/sync.hh
@@ -33,7 +33,7 @@ private:
 
 public:
 
-    Sync() { }
+    Sync() : data{} { }
     Sync(const T & data) : data(data) { }
     Sync(T && data) noexcept : data(std::move(data)) { }
 
diff --git a/tests/functional/flakes/flakes.sh b/tests/functional/flakes/flakes.sh
index 97dc6d818..01a9d4d73 100644
--- a/tests/functional/flakes/flakes.sh
+++ b/tests/functional/flakes/flakes.sh
@@ -209,7 +209,7 @@ git -C $flake3Dir commit -m 'Add lockfile'
 # Test whether registry caching works.
 nix registry list --flake-registry file://$registry | grepQuiet flake3
 mv $registry $registry.tmp
-nix store gc
+_RR_TRACE_DIR=/home/jade/.local/share/rr rr record -- nix store gc
 nix registry list --flake-registry file://$registry --refresh | grepQuiet flake3
 mv $registry.tmp $registry