Merge pull request #8544 from edolstra/handle-missing-gc-socket

LocalStore::addTempRoot(): Handle ENOENT
This commit is contained in:
Eelco Dolstra 2024-01-16 16:05:58 +01:00 committed by GitHub
commit 7115edc85a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 60 additions and 24 deletions

View file

@ -149,11 +149,12 @@ void LocalStore::addTempRoot(const StorePath & path)
try { try {
nix::connect(fdRootsSocket->get(), socketPath); nix::connect(fdRootsSocket->get(), socketPath);
} catch (SysError & e) { } catch (SysError & e) {
/* The garbage collector may have exited, so we need to /* The garbage collector may have exited or not
restart. */ created the socket yet, so we need to restart. */
if (e.errNo == ECONNREFUSED) { if (e.errNo == ECONNREFUSED || e.errNo == ENOENT) {
debug("GC socket connection refused"); debug("GC socket connection refused: %s", e.msg());
fdRootsSocket->close(); fdRootsSocket->close();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
goto restart; goto restart;
} }
throw; throw;
@ -509,6 +510,11 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
auto fdGCLock = openGCLock(); auto fdGCLock = openGCLock();
FdLock gcLock(fdGCLock.get(), ltWrite, true, "waiting for the big garbage collector lock..."); FdLock gcLock(fdGCLock.get(), ltWrite, true, "waiting for the big garbage collector lock...");
/* Synchronisation point to test ENOENT handling in
addTempRoot(), see tests/gc-non-blocking.sh. */
if (auto p = getEnv("_NIX_TEST_GC_SYNC_1"))
readFile(*p);
/* Start the server for receiving new roots. */ /* Start the server for receiving new roots. */
auto socketPath = stateDir.get() + gcSocketPath; auto socketPath = stateDir.get() + gcSocketPath;
createDirs(dirOf(socketPath)); createDirs(dirOf(socketPath));
@ -632,6 +638,10 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
roots.insert(root.first); roots.insert(root.first);
} }
/* Synchronisation point for testing, see tests/functional/gc-non-blocking.sh. */
if (auto p = getEnv("_NIX_TEST_GC_SYNC_2"))
readFile(*p);
/* Helper function that deletes a path from the store and throws /* Helper function that deletes a path from the store and throws
GCLimitReached if we've deleted enough garbage. */ GCLimitReached if we've deleted enough garbage. */
auto deleteFromStore = [&](std::string_view baseName) auto deleteFromStore = [&](std::string_view baseName)
@ -778,10 +788,6 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
} }
}; };
/* Synchronisation point for testing, see tests/functional/gc-concurrent.sh. */
if (auto p = getEnv("_NIX_TEST_GC_SYNC"))
readFile(*p);
/* Either delete all garbage paths, or just the specified /* Either delete all garbage paths, or just the specified
paths (for gcDeleteSpecific). */ paths (for gcDeleteSpecific). */
if (options.action == GCOptions::gcDeleteSpecific) { if (options.action == GCOptions::gcDeleteSpecific) {

View file

@ -1,6 +1,7 @@
#include "file-system.hh" #include "file-system.hh"
#include "processes.hh" #include "processes.hh"
#include "unix-domain-socket.hh" #include "unix-domain-socket.hh"
#include "util.hh"
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/un.h> #include <sys/un.h>
@ -75,21 +76,35 @@ void connect(int fd, const std::string & path)
addr.sun_family = AF_UNIX; addr.sun_family = AF_UNIX;
if (path.size() + 1 >= sizeof(addr.sun_path)) { if (path.size() + 1 >= sizeof(addr.sun_path)) {
Pipe pipe;
pipe.create();
Pid pid = startProcess([&]() { Pid pid = startProcess([&]() {
Path dir = dirOf(path); try {
if (chdir(dir.c_str()) == -1) pipe.readSide.close();
throw SysError("chdir to '%s' failed", dir); Path dir = dirOf(path);
std::string base(baseNameOf(path)); if (chdir(dir.c_str()) == -1)
if (base.size() + 1 >= sizeof(addr.sun_path)) throw SysError("chdir to '%s' failed", dir);
throw Error("socket path '%s' is too long", base); std::string base(baseNameOf(path));
memcpy(addr.sun_path, base.c_str(), base.size() + 1); if (base.size() + 1 >= sizeof(addr.sun_path))
if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) throw Error("socket path '%s' is too long", base);
throw SysError("cannot connect to socket at '%s'", path); memcpy(addr.sun_path, base.c_str(), base.size() + 1);
_exit(0); if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1)
throw SysError("cannot connect to socket at '%s'", path);
writeFull(pipe.writeSide.get(), "0\n");
} catch (SysError & e) {
writeFull(pipe.writeSide.get(), fmt("%d\n", e.errNo));
} catch (...) {
writeFull(pipe.writeSide.get(), "-1\n");
}
}); });
int status = pid.wait(); pipe.writeSide.close();
if (status != 0) auto errNo = string2Int<int>(chomp(drainFD(pipe.readSide.get())));
if (!errNo || *errNo == -1)
throw Error("cannot connect to socket at '%s'", path); throw Error("cannot connect to socket at '%s'", path);
else if (*errNo > 0) {
errno = *errNo;
throw SysError("cannot connect to socket at '%s'", path);
}
} else { } else {
memcpy(addr.sun_path, path.c_str(), path.size() + 1); memcpy(addr.sun_path, path.c_str(), path.size() + 1);
if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1)

View file

@ -6,27 +6,42 @@ needLocalStore "the GC test needs a synchronisation point"
clearStore clearStore
fifo=$TEST_ROOT/test.fifo # This FIFO is read just after the global GC lock has been acquired,
mkfifo "$fifo" # but before the root server is started.
fifo1=$TEST_ROOT/test2.fifo
mkfifo "$fifo1"
# This FIFO is read just after the roots have been read, but before
# the actual GC starts.
fifo2=$TEST_ROOT/test.fifo
mkfifo "$fifo2"
dummy=$(nix store add-path ./simple.nix) dummy=$(nix store add-path ./simple.nix)
running=$TEST_ROOT/running running=$TEST_ROOT/running
touch $running touch $running
(_NIX_TEST_GC_SYNC=$fifo nix-store --gc -vvvvv; rm $running) & # Start GC.
(_NIX_TEST_GC_SYNC_1=$fifo1 _NIX_TEST_GC_SYNC_2=$fifo2 nix-store --gc -vvvvv; rm $running) &
pid=$! pid=$!
sleep 2 sleep 2
# Delay the start of the root server to check that the build below
# correctly handles ENOENT when connecting to the root server.
(sleep 1; echo > $fifo1) &
pid2=$!
# Start a build. This should not be blocked by the GC in progress.
outPath=$(nix-build --max-silent-time 60 -o "$TEST_ROOT/result" -E " outPath=$(nix-build --max-silent-time 60 -o "$TEST_ROOT/result" -E "
with import ./config.nix; with import ./config.nix;
mkDerivation { mkDerivation {
name = \"non-blocking\"; name = \"non-blocking\";
buildCommand = \"set -x; test -e $running; mkdir \$out; echo > $fifo\"; buildCommand = \"set -x; test -e $running; mkdir \$out; echo > $fifo2\";
}") }")
wait $pid wait $pid
wait $pid2
(! test -e $running) (! test -e $running)
(! test -e $dummy) (! test -e $dummy)