Merge pull request #8544 from edolstra/handle-missing-gc-socket

LocalStore::addTempRoot(): Handle ENOENT
This commit is contained in:
Eelco Dolstra 2024-01-16 16:05:58 +01:00 committed by GitHub
commit 7115edc85a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 60 additions and 24 deletions

View file

@ -149,11 +149,12 @@ void LocalStore::addTempRoot(const StorePath & path)
try {
nix::connect(fdRootsSocket->get(), socketPath);
} catch (SysError & e) {
/* The garbage collector may have exited, so we need to
restart. */
if (e.errNo == ECONNREFUSED) {
debug("GC socket connection refused");
/* The garbage collector may have exited or not
created the socket yet, so we need to restart. */
if (e.errNo == ECONNREFUSED || e.errNo == ENOENT) {
debug("GC socket connection refused: %s", e.msg());
fdRootsSocket->close();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
goto restart;
}
throw;
@ -509,6 +510,11 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
auto fdGCLock = openGCLock();
FdLock gcLock(fdGCLock.get(), ltWrite, true, "waiting for the big garbage collector lock...");
/* Synchronisation point to test ENOENT handling in
addTempRoot(), see tests/gc-non-blocking.sh. */
if (auto p = getEnv("_NIX_TEST_GC_SYNC_1"))
readFile(*p);
/* Start the server for receiving new roots. */
auto socketPath = stateDir.get() + gcSocketPath;
createDirs(dirOf(socketPath));
@ -632,6 +638,10 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
roots.insert(root.first);
}
/* Synchronisation point for testing, see tests/functional/gc-non-blocking.sh. */
if (auto p = getEnv("_NIX_TEST_GC_SYNC_2"))
readFile(*p);
/* Helper function that deletes a path from the store and throws
GCLimitReached if we've deleted enough garbage. */
auto deleteFromStore = [&](std::string_view baseName)
@ -778,10 +788,6 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
}
};
/* Synchronisation point for testing, see tests/functional/gc-concurrent.sh. */
if (auto p = getEnv("_NIX_TEST_GC_SYNC"))
readFile(*p);
/* Either delete all garbage paths, or just the specified
paths (for gcDeleteSpecific). */
if (options.action == GCOptions::gcDeleteSpecific) {

View file

@ -1,6 +1,7 @@
#include "file-system.hh"
#include "processes.hh"
#include "unix-domain-socket.hh"
#include "util.hh"
#include <sys/socket.h>
#include <sys/un.h>
@ -75,21 +76,35 @@ void connect(int fd, const std::string & path)
addr.sun_family = AF_UNIX;
if (path.size() + 1 >= sizeof(addr.sun_path)) {
Pipe pipe;
pipe.create();
Pid pid = startProcess([&]() {
Path dir = dirOf(path);
if (chdir(dir.c_str()) == -1)
throw SysError("chdir to '%s' failed", dir);
std::string base(baseNameOf(path));
if (base.size() + 1 >= sizeof(addr.sun_path))
throw Error("socket path '%s' is too long", base);
memcpy(addr.sun_path, base.c_str(), base.size() + 1);
if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1)
throw SysError("cannot connect to socket at '%s'", path);
_exit(0);
try {
pipe.readSide.close();
Path dir = dirOf(path);
if (chdir(dir.c_str()) == -1)
throw SysError("chdir to '%s' failed", dir);
std::string base(baseNameOf(path));
if (base.size() + 1 >= sizeof(addr.sun_path))
throw Error("socket path '%s' is too long", base);
memcpy(addr.sun_path, base.c_str(), base.size() + 1);
if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1)
throw SysError("cannot connect to socket at '%s'", path);
writeFull(pipe.writeSide.get(), "0\n");
} catch (SysError & e) {
writeFull(pipe.writeSide.get(), fmt("%d\n", e.errNo));
} catch (...) {
writeFull(pipe.writeSide.get(), "-1\n");
}
});
int status = pid.wait();
if (status != 0)
pipe.writeSide.close();
auto errNo = string2Int<int>(chomp(drainFD(pipe.readSide.get())));
if (!errNo || *errNo == -1)
throw Error("cannot connect to socket at '%s'", path);
else if (*errNo > 0) {
errno = *errNo;
throw SysError("cannot connect to socket at '%s'", path);
}
} else {
memcpy(addr.sun_path, path.c_str(), path.size() + 1);
if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1)

View file

@ -6,27 +6,42 @@ needLocalStore "the GC test needs a synchronisation point"
clearStore
fifo=$TEST_ROOT/test.fifo
mkfifo "$fifo"
# This FIFO is read just after the global GC lock has been acquired,
# but before the root server is started.
fifo1=$TEST_ROOT/test2.fifo
mkfifo "$fifo1"
# This FIFO is read just after the roots have been read, but before
# the actual GC starts.
fifo2=$TEST_ROOT/test.fifo
mkfifo "$fifo2"
dummy=$(nix store add-path ./simple.nix)
running=$TEST_ROOT/running
touch $running
(_NIX_TEST_GC_SYNC=$fifo nix-store --gc -vvvvv; rm $running) &
# Start GC.
(_NIX_TEST_GC_SYNC_1=$fifo1 _NIX_TEST_GC_SYNC_2=$fifo2 nix-store --gc -vvvvv; rm $running) &
pid=$!
sleep 2
# Delay the start of the root server to check that the build below
# correctly handles ENOENT when connecting to the root server.
(sleep 1; echo > $fifo1) &
pid2=$!
# Start a build. This should not be blocked by the GC in progress.
outPath=$(nix-build --max-silent-time 60 -o "$TEST_ROOT/result" -E "
with import ./config.nix;
mkDerivation {
name = \"non-blocking\";
buildCommand = \"set -x; test -e $running; mkdir \$out; echo > $fifo\";
buildCommand = \"set -x; test -e $running; mkdir \$out; echo > $fifo2\";
}")
wait $pid
wait $pid2
(! test -e $running)
(! test -e $dummy)