Merge pull request #7542 from edolstra/gc-deadlock

Fix deadlock between auto-GC and addTempRoot()
This commit is contained in:
Eelco Dolstra 2023-01-05 17:08:23 +01:00 committed by GitHub
commit 3a98107170
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 47 deletions

View file

@ -77,60 +77,73 @@ Path LocalFSStore::addPermRoot(const StorePath & storePath, const Path & _gcRoot
} }
void LocalStore::addTempRoot(const StorePath & path) void LocalStore::createTempRootsFile()
{ {
auto state(_state.lock()); auto fdTempRoots(_fdTempRoots.lock());
/* Create the temporary roots file for this process. */ /* Create the temporary roots file for this process. */
if (!state->fdTempRoots) { if (*fdTempRoots) return;
while (1) { while (1) {
if (pathExists(fnTempRoots)) if (pathExists(fnTempRoots))
/* It *must* be stale, since there can be no two /* It *must* be stale, since there can be no two
processes with the same pid. */ processes with the same pid. */
unlink(fnTempRoots.c_str()); unlink(fnTempRoots.c_str());
state->fdTempRoots = openLockFile(fnTempRoots, true); *fdTempRoots = openLockFile(fnTempRoots, true);
debug("acquiring write lock on '%s'", fnTempRoots); debug("acquiring write lock on '%s'", fnTempRoots);
lockFile(state->fdTempRoots.get(), ltWrite, true); lockFile(fdTempRoots->get(), ltWrite, true);
/* Check whether the garbage collector didn't get in our /* Check whether the garbage collector didn't get in our
way. */ way. */
struct stat st; struct stat st;
if (fstat(state->fdTempRoots.get(), &st) == -1) if (fstat(fdTempRoots->get(), &st) == -1)
throw SysError("statting '%1%'", fnTempRoots); throw SysError("statting '%1%'", fnTempRoots);
if (st.st_size == 0) break; if (st.st_size == 0) break;
/* The garbage collector deleted this file before we could /* The garbage collector deleted this file before we could get
get a lock. (It won't delete the file after we get a a lock. (It won't delete the file after we get a lock.)
lock.) Try again. */ Try again. */
} }
}
void LocalStore::addTempRoot(const StorePath & path)
{
createTempRootsFile();
/* Open/create the global GC lock file. */
{
auto fdGCLock(_fdGCLock.lock());
if (!*fdGCLock)
*fdGCLock = openGCLock();
} }
if (!state->fdGCLock)
state->fdGCLock = openGCLock();
restart: restart:
FdLock gcLock(state->fdGCLock.get(), ltRead, false, ""); /* Try to acquire a shared global GC lock (non-blocking). This
only succeeds if the garbage collector is not currently
running. */
FdLock gcLock(_fdGCLock.lock()->get(), ltRead, false, "");
if (!gcLock.acquired) { if (!gcLock.acquired) {
/* We couldn't get a shared global GC lock, so the garbage /* We couldn't get a shared global GC lock, so the garbage
collector is running. So we have to connect to the garbage collector is running. So we have to connect to the garbage
collector and inform it about our root. */ collector and inform it about our root. */
if (!state->fdRootsSocket) { auto fdRootsSocket(_fdRootsSocket.lock());
if (!*fdRootsSocket) {
auto socketPath = stateDir.get() + gcSocketPath; auto socketPath = stateDir.get() + gcSocketPath;
debug("connecting to '%s'", socketPath); debug("connecting to '%s'", socketPath);
state->fdRootsSocket = createUnixDomainSocket(); *fdRootsSocket = createUnixDomainSocket();
try { try {
nix::connect(state->fdRootsSocket.get(), socketPath); nix::connect(fdRootsSocket->get(), socketPath);
} catch (SysError & e) { } catch (SysError & e) {
/* The garbage collector may have exited, so we need to /* The garbage collector may have exited, so we need to
restart. */ restart. */
if (e.errNo == ECONNREFUSED) { if (e.errNo == ECONNREFUSED) {
debug("GC socket connection refused"); debug("GC socket connection refused");
state->fdRootsSocket.close(); fdRootsSocket->close();
goto restart; goto restart;
} }
throw; throw;
@ -139,9 +152,9 @@ void LocalStore::addTempRoot(const StorePath & path)
try { try {
debug("sending GC root '%s'", printStorePath(path)); debug("sending GC root '%s'", printStorePath(path));
writeFull(state->fdRootsSocket.get(), printStorePath(path) + "\n", false); writeFull(fdRootsSocket->get(), printStorePath(path) + "\n", false);
char c; char c;
readFull(state->fdRootsSocket.get(), &c, 1); readFull(fdRootsSocket->get(), &c, 1);
assert(c == '1'); assert(c == '1');
debug("got ack for GC root '%s'", printStorePath(path)); debug("got ack for GC root '%s'", printStorePath(path));
} catch (SysError & e) { } catch (SysError & e) {
@ -149,20 +162,21 @@ void LocalStore::addTempRoot(const StorePath & path)
restart. */ restart. */
if (e.errNo == EPIPE || e.errNo == ECONNRESET) { if (e.errNo == EPIPE || e.errNo == ECONNRESET) {
debug("GC socket disconnected"); debug("GC socket disconnected");
state->fdRootsSocket.close(); fdRootsSocket->close();
goto restart; goto restart;
} }
throw; throw;
} catch (EndOfFile & e) { } catch (EndOfFile & e) {
debug("GC socket disconnected"); debug("GC socket disconnected");
state->fdRootsSocket.close(); fdRootsSocket->close();
goto restart; goto restart;
} }
} }
/* Append the store path to the temporary roots file. */ /* Record the store path in the temporary roots file so it will be
seen by a future run of the garbage collector. */
auto s = printStorePath(path) + '\0'; auto s = printStorePath(path) + '\0';
writeFull(state->fdTempRoots.get(), s); writeFull(_fdTempRoots.lock()->get(), s);
} }

View file

@ -441,9 +441,9 @@ LocalStore::~LocalStore()
} }
try { try {
auto state(_state.lock()); auto fdTempRoots(_fdTempRoots.lock());
if (state->fdTempRoots) { if (*fdTempRoots) {
state->fdTempRoots = -1; *fdTempRoots = -1;
unlink(fnTempRoots.c_str()); unlink(fnTempRoots.c_str());
} }
} catch (...) { } catch (...) {

View file

@ -59,15 +59,6 @@ private:
struct Stmts; struct Stmts;
std::unique_ptr<Stmts> stmts; std::unique_ptr<Stmts> stmts;
/* The global GC lock */
AutoCloseFD fdGCLock;
/* The file to which we write our temporary roots. */
AutoCloseFD fdTempRoots;
/* Connection to the garbage collector. */
AutoCloseFD fdRootsSocket;
/* The last time we checked whether to do an auto-GC, or an /* The last time we checked whether to do an auto-GC, or an
auto-GC finished. */ auto-GC finished. */
std::chrono::time_point<std::chrono::steady_clock> lastGCCheck; std::chrono::time_point<std::chrono::steady_clock> lastGCCheck;
@ -156,6 +147,21 @@ public:
void addTempRoot(const StorePath & path) override; void addTempRoot(const StorePath & path) override;
private:
void createTempRootsFile();
/* The file to which we write our temporary roots. */
Sync<AutoCloseFD> _fdTempRoots;
/* The global GC lock. */
Sync<AutoCloseFD> _fdGCLock;
/* Connection to the garbage collector. */
Sync<AutoCloseFD> _fdRootsSocket;
public:
void addIndirectRoot(const Path & path) override; void addIndirectRoot(const Path & path) override;
private: private: