Fix deadlock between auto-GC and addTempRoot()

Previously addTempRoot() acquired the LocalStore state lock and waited
for the garbage collector to reply. If the garbage collector is in the
same process (as it the case with auto-GC), this would deadlock as
soon as the garbage collector thread needs the LocalStore state lock.

So now addTempRoot() uses separate Syncs for the state that it
needs. As long at the auto-GC thread doesn't call addTempRoot() (which
it shouldn't), it shouldn't deadlock.

Fixes #3224.
This commit is contained in:
Eelco Dolstra 2023-01-03 15:15:14 +01:00
parent 224b56f10e
commit 28d5b5cd45
2 changed files with 28 additions and 20 deletions

View file

@ -113,30 +113,37 @@ void LocalStore::addTempRoot(const StorePath & path)
{
createTempRootsFile();
auto state(_state.lock());
if (!state->fdGCLock)
state->fdGCLock = openGCLock();
/* Open/create the global GC lock file. */
{
auto fdGCLock(_fdGCLock.lock());
if (!*fdGCLock)
*fdGCLock = openGCLock();
}
restart:
FdLock gcLock(state->fdGCLock.get(), ltRead, false, "");
/* Try to acquire a shared global GC lock (non-blocking). This
only succeeds if the garbage collector is not currently
running. */
FdLock gcLock(_fdGCLock.lock()->get(), ltRead, false, "");
if (!gcLock.acquired) {
/* We couldn't get a shared global GC lock, so the garbage
collector is running. So we have to connect to the garbage
collector and inform it about our root. */
if (!state->fdRootsSocket) {
auto fdRootsSocket(_fdRootsSocket.lock());
if (!*fdRootsSocket) {
auto socketPath = stateDir.get() + gcSocketPath;
debug("connecting to '%s'", socketPath);
state->fdRootsSocket = createUnixDomainSocket();
*fdRootsSocket = createUnixDomainSocket();
try {
nix::connect(state->fdRootsSocket.get(), socketPath);
nix::connect(fdRootsSocket->get(), socketPath);
} catch (SysError & e) {
/* The garbage collector may have exited, so we need to
restart. */
if (e.errNo == ECONNREFUSED) {
debug("GC socket connection refused");
state->fdRootsSocket.close();
fdRootsSocket->close();
goto restart;
}
throw;
@ -145,9 +152,9 @@ void LocalStore::addTempRoot(const StorePath & path)
try {
debug("sending GC root '%s'", printStorePath(path));
writeFull(state->fdRootsSocket.get(), printStorePath(path) + "\n", false);
writeFull(fdRootsSocket->get(), printStorePath(path) + "\n", false);
char c;
readFull(state->fdRootsSocket.get(), &c, 1);
readFull(fdRootsSocket->get(), &c, 1);
assert(c == '1');
debug("got ack for GC root '%s'", printStorePath(path));
} catch (SysError & e) {
@ -155,18 +162,19 @@ void LocalStore::addTempRoot(const StorePath & path)
restart. */
if (e.errNo == EPIPE || e.errNo == ECONNRESET) {
debug("GC socket disconnected");
state->fdRootsSocket.close();
fdRootsSocket->close();
goto restart;
}
throw;
} catch (EndOfFile & e) {
debug("GC socket disconnected");
state->fdRootsSocket.close();
fdRootsSocket->close();
goto restart;
}
}
/* Append the store path to the temporary roots file. */
/* Record the store path in the temporary roots file so it will be
seen by a future run of the garbage collector. */
auto s = printStorePath(path) + '\0';
writeFull(_fdTempRoots.lock()->get(), s);
}

View file

@ -59,12 +59,6 @@ private:
struct Stmts;
std::unique_ptr<Stmts> stmts;
/* The global GC lock */
AutoCloseFD fdGCLock;
/* Connection to the garbage collector. */
AutoCloseFD fdRootsSocket;
/* The last time we checked whether to do an auto-GC, or an
auto-GC finished. */
std::chrono::time_point<std::chrono::steady_clock> lastGCCheck;
@ -160,6 +154,12 @@ private:
/* The file to which we write our temporary roots. */
Sync<AutoCloseFD> _fdTempRoots;
/* The global GC lock. */
Sync<AutoCloseFD> _fdGCLock;
/* Connection to the garbage collector. */
Sync<AutoCloseFD> _fdRootsSocket;
public:
void addIndirectRoot(const Path & path) override;