Merge pull request #7542 from edolstra/gc-deadlock

Fix deadlock between auto-GC and addTempRoot()
This commit is contained in:
Eelco Dolstra 2023-01-05 17:08:23 +01:00 committed by GitHub
commit 3a98107170
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 47 deletions

View file

@ -77,60 +77,73 @@ Path LocalFSStore::addPermRoot(const StorePath & storePath, const Path & _gcRoot
}
void LocalStore::addTempRoot(const StorePath & path)
void LocalStore::createTempRootsFile()
{
auto state(_state.lock());
auto fdTempRoots(_fdTempRoots.lock());
/* Create the temporary roots file for this process. */
if (!state->fdTempRoots) {
if (*fdTempRoots) return;
while (1) {
if (pathExists(fnTempRoots))
/* It *must* be stale, since there can be no two
processes with the same pid. */
unlink(fnTempRoots.c_str());
while (1) {
if (pathExists(fnTempRoots))
/* It *must* be stale, since there can be no two
processes with the same pid. */
unlink(fnTempRoots.c_str());
state->fdTempRoots = openLockFile(fnTempRoots, true);
*fdTempRoots = openLockFile(fnTempRoots, true);
debug("acquiring write lock on '%s'", fnTempRoots);
lockFile(state->fdTempRoots.get(), ltWrite, true);
debug("acquiring write lock on '%s'", fnTempRoots);
lockFile(fdTempRoots->get(), ltWrite, true);
/* Check whether the garbage collector didn't get in our
way. */
struct stat st;
if (fstat(state->fdTempRoots.get(), &st) == -1)
throw SysError("statting '%1%'", fnTempRoots);
if (st.st_size == 0) break;
/* Check whether the garbage collector didn't get in our
way. */
struct stat st;
if (fstat(fdTempRoots->get(), &st) == -1)
throw SysError("statting '%1%'", fnTempRoots);
if (st.st_size == 0) break;
/* The garbage collector deleted this file before we could
get a lock. (It won't delete the file after we get a
lock.) Try again. */
}
/* The garbage collector deleted this file before we could get
a lock. (It won't delete the file after we get a lock.)
Try again. */
}
}
void LocalStore::addTempRoot(const StorePath & path)
{
createTempRootsFile();
/* Open/create the global GC lock file. */
{
auto fdGCLock(_fdGCLock.lock());
if (!*fdGCLock)
*fdGCLock = openGCLock();
}
if (!state->fdGCLock)
state->fdGCLock = openGCLock();
restart:
FdLock gcLock(state->fdGCLock.get(), ltRead, false, "");
/* Try to acquire a shared global GC lock (non-blocking). This
only succeeds if the garbage collector is not currently
running. */
FdLock gcLock(_fdGCLock.lock()->get(), ltRead, false, "");
if (!gcLock.acquired) {
/* We couldn't get a shared global GC lock, so the garbage
collector is running. So we have to connect to the garbage
collector and inform it about our root. */
if (!state->fdRootsSocket) {
auto fdRootsSocket(_fdRootsSocket.lock());
if (!*fdRootsSocket) {
auto socketPath = stateDir.get() + gcSocketPath;
debug("connecting to '%s'", socketPath);
state->fdRootsSocket = createUnixDomainSocket();
*fdRootsSocket = createUnixDomainSocket();
try {
nix::connect(state->fdRootsSocket.get(), socketPath);
nix::connect(fdRootsSocket->get(), socketPath);
} catch (SysError & e) {
/* The garbage collector may have exited, so we need to
restart. */
if (e.errNo == ECONNREFUSED) {
debug("GC socket connection refused");
state->fdRootsSocket.close();
fdRootsSocket->close();
goto restart;
}
throw;
@ -139,9 +152,9 @@ void LocalStore::addTempRoot(const StorePath & path)
try {
debug("sending GC root '%s'", printStorePath(path));
writeFull(state->fdRootsSocket.get(), printStorePath(path) + "\n", false);
writeFull(fdRootsSocket->get(), printStorePath(path) + "\n", false);
char c;
readFull(state->fdRootsSocket.get(), &c, 1);
readFull(fdRootsSocket->get(), &c, 1);
assert(c == '1');
debug("got ack for GC root '%s'", printStorePath(path));
} catch (SysError & e) {
@ -149,20 +162,21 @@ void LocalStore::addTempRoot(const StorePath & path)
restart. */
if (e.errNo == EPIPE || e.errNo == ECONNRESET) {
debug("GC socket disconnected");
state->fdRootsSocket.close();
fdRootsSocket->close();
goto restart;
}
throw;
} catch (EndOfFile & e) {
debug("GC socket disconnected");
state->fdRootsSocket.close();
fdRootsSocket->close();
goto restart;
}
}
/* Append the store path to the temporary roots file. */
/* Record the store path in the temporary roots file so it will be
seen by a future run of the garbage collector. */
auto s = printStorePath(path) + '\0';
writeFull(state->fdTempRoots.get(), s);
writeFull(_fdTempRoots.lock()->get(), s);
}

View file

@ -441,9 +441,9 @@ LocalStore::~LocalStore()
}
try {
auto state(_state.lock());
if (state->fdTempRoots) {
state->fdTempRoots = -1;
auto fdTempRoots(_fdTempRoots.lock());
if (*fdTempRoots) {
*fdTempRoots = -1;
unlink(fnTempRoots.c_str());
}
} catch (...) {

View file

@ -59,15 +59,6 @@ private:
struct Stmts;
std::unique_ptr<Stmts> stmts;
/* The global GC lock */
AutoCloseFD fdGCLock;
/* The file to which we write our temporary roots. */
AutoCloseFD fdTempRoots;
/* Connection to the garbage collector. */
AutoCloseFD fdRootsSocket;
/* The last time we checked whether to do an auto-GC, or an
auto-GC finished. */
std::chrono::time_point<std::chrono::steady_clock> lastGCCheck;
@ -156,6 +147,21 @@ public:
void addTempRoot(const StorePath & path) override;
private:
void createTempRootsFile();
/* The file to which we write our temporary roots. */
Sync<AutoCloseFD> _fdTempRoots;
/* The global GC lock. */
Sync<AutoCloseFD> _fdGCLock;
/* Connection to the garbage collector. */
Sync<AutoCloseFD> _fdRootsSocket;
public:
void addIndirectRoot(const Path & path) override;
private: