From 28d5b5cd453d89642557455d82b98903da2898b7 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 3 Jan 2023 15:15:14 +0100 Subject: [PATCH] Fix deadlock between auto-GC and addTempRoot() Previously addTempRoot() acquired the LocalStore state lock and waited for the garbage collector to reply. If the garbage collector is in the same process (as it the case with auto-GC), this would deadlock as soon as the garbage collector thread needs the LocalStore state lock. So now addTempRoot() uses separate Syncs for the state that it needs. As long at the auto-GC thread doesn't call addTempRoot() (which it shouldn't), it shouldn't deadlock. Fixes #3224. --- src/libstore/gc.cc | 36 ++++++++++++++++++++++-------------- src/libstore/local-store.hh | 12 ++++++------ 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc index f8c29593f..996f26a95 100644 --- a/src/libstore/gc.cc +++ b/src/libstore/gc.cc @@ -113,30 +113,37 @@ void LocalStore::addTempRoot(const StorePath & path) { createTempRootsFile(); - auto state(_state.lock()); - - if (!state->fdGCLock) - state->fdGCLock = openGCLock(); + /* Open/create the global GC lock file. */ + { + auto fdGCLock(_fdGCLock.lock()); + if (!*fdGCLock) + *fdGCLock = openGCLock(); + } restart: - FdLock gcLock(state->fdGCLock.get(), ltRead, false, ""); + /* Try to acquire a shared global GC lock (non-blocking). This + only succeeds if the garbage collector is not currently + running. */ + FdLock gcLock(_fdGCLock.lock()->get(), ltRead, false, ""); if (!gcLock.acquired) { /* We couldn't get a shared global GC lock, so the garbage collector is running. So we have to connect to the garbage collector and inform it about our root. */ - if (!state->fdRootsSocket) { + auto fdRootsSocket(_fdRootsSocket.lock()); + + if (!*fdRootsSocket) { auto socketPath = stateDir.get() + gcSocketPath; debug("connecting to '%s'", socketPath); - state->fdRootsSocket = createUnixDomainSocket(); + *fdRootsSocket = createUnixDomainSocket(); try { - nix::connect(state->fdRootsSocket.get(), socketPath); + nix::connect(fdRootsSocket->get(), socketPath); } catch (SysError & e) { /* The garbage collector may have exited, so we need to restart. */ if (e.errNo == ECONNREFUSED) { debug("GC socket connection refused"); - state->fdRootsSocket.close(); + fdRootsSocket->close(); goto restart; } throw; @@ -145,9 +152,9 @@ void LocalStore::addTempRoot(const StorePath & path) try { debug("sending GC root '%s'", printStorePath(path)); - writeFull(state->fdRootsSocket.get(), printStorePath(path) + "\n", false); + writeFull(fdRootsSocket->get(), printStorePath(path) + "\n", false); char c; - readFull(state->fdRootsSocket.get(), &c, 1); + readFull(fdRootsSocket->get(), &c, 1); assert(c == '1'); debug("got ack for GC root '%s'", printStorePath(path)); } catch (SysError & e) { @@ -155,18 +162,19 @@ void LocalStore::addTempRoot(const StorePath & path) restart. */ if (e.errNo == EPIPE || e.errNo == ECONNRESET) { debug("GC socket disconnected"); - state->fdRootsSocket.close(); + fdRootsSocket->close(); goto restart; } throw; } catch (EndOfFile & e) { debug("GC socket disconnected"); - state->fdRootsSocket.close(); + fdRootsSocket->close(); goto restart; } } - /* Append the store path to the temporary roots file. */ + /* Record the store path in the temporary roots file so it will be + seen by a future run of the garbage collector. */ auto s = printStorePath(path) + '\0'; writeFull(_fdTempRoots.lock()->get(), s); } diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index 9ea0c0bf7..06d36a7d5 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -59,12 +59,6 @@ private: struct Stmts; std::unique_ptr stmts; - /* The global GC lock */ - AutoCloseFD fdGCLock; - - /* Connection to the garbage collector. */ - AutoCloseFD fdRootsSocket; - /* The last time we checked whether to do an auto-GC, or an auto-GC finished. */ std::chrono::time_point lastGCCheck; @@ -160,6 +154,12 @@ private: /* The file to which we write our temporary roots. */ Sync _fdTempRoots; + /* The global GC lock. */ + Sync _fdGCLock; + + /* Connection to the garbage collector. */ + Sync _fdRootsSocket; + public: void addIndirectRoot(const Path & path) override;