From 06e92450bd87baa9a1cc06e09f59e5d79bb4b707 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Thu, 7 Mar 2024 06:15:32 +0100 Subject: [PATCH] Merge pull request #8544 from edolstra/handle-missing-gc-socket LocalStore: :addTempRoot(): Handle ENOENT (cherry picked from commit 7115edc85af060ef235ac0270245ab46cc828f7c) Change-Id: Ie6b1596049c3fde09b98f2f0727899f98e48e6b1 --- src/libstore/gc.cc | 22 +++++++++++------ src/libutil/util.cc | 38 ++++++++++++++++++++--------- tests/functional/gc-non-blocking.sh | 23 ++++++++++++++--- 3 files changed, 59 insertions(+), 24 deletions(-) diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc index 7c7273012..ac61f7f53 100644 --- a/src/libstore/gc.cc +++ b/src/libstore/gc.cc @@ -142,11 +142,12 @@ void LocalStore::addTempRoot(const StorePath & path) try { nix::connect(fdRootsSocket->get(), socketPath); } catch (SysError & e) { - /* The garbage collector may have exited, so we need to - restart. */ - if (e.errNo == ECONNREFUSED) { - debug("GC socket connection refused"); + /* The garbage collector may have exited or not + created the socket yet, so we need to restart. */ + if (e.errNo == ECONNREFUSED || e.errNo == ENOENT) { + debug("GC socket connection refused: %s", e.msg()); fdRootsSocket->close(); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); goto restart; } throw; @@ -502,6 +503,11 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results) auto fdGCLock = openGCLock(); FdLock gcLock(fdGCLock.get(), ltWrite, true, "waiting for the big garbage collector lock..."); + /* Synchronisation point to test ENOENT handling in + addTempRoot(), see tests/gc-non-blocking.sh. */ + if (auto p = getEnv("_NIX_TEST_GC_SYNC_1")) + readFile(*p); + /* Start the server for receiving new roots. */ auto socketPath = stateDir.get() + gcSocketPath; createDirs(dirOf(socketPath)); @@ -625,6 +631,10 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results) roots.insert(root.first); } + /* Synchronisation point for testing, see tests/functional/gc-non-blocking.sh. */ + if (auto p = getEnv("_NIX_TEST_GC_SYNC_2")) + readFile(*p); + /* Helper function that deletes a path from the store and throws GCLimitReached if we've deleted enough garbage. */ auto deleteFromStore = [&](std::string_view baseName) @@ -771,10 +781,6 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results) } }; - /* Synchronisation point for testing, see tests/functional/gc-concurrent.sh. */ - if (auto p = getEnv("_NIX_TEST_GC_SYNC")) - readFile(*p); - /* Either delete all garbage paths, or just the specified paths (for gcDeleteSpecific). */ if (options.action == GCOptions::gcDeleteSpecific) { diff --git a/src/libutil/util.cc b/src/libutil/util.cc index bbd57434d..6bcb069ba 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -2042,21 +2042,35 @@ void connect(int fd, const std::string & path) addr.sun_family = AF_UNIX; if (path.size() + 1 >= sizeof(addr.sun_path)) { + Pipe pipe; + pipe.create(); Pid pid = startProcess([&]() { - Path dir = dirOf(path); - if (chdir(dir.c_str()) == -1) - throw SysError("chdir to '%s' failed", dir); - std::string base(baseNameOf(path)); - if (base.size() + 1 >= sizeof(addr.sun_path)) - throw Error("socket path '%s' is too long", base); - memcpy(addr.sun_path, base.c_str(), base.size() + 1); - if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) - throw SysError("cannot connect to socket at '%s'", path); - _exit(0); + try { + pipe.readSide.close(); + Path dir = dirOf(path); + if (chdir(dir.c_str()) == -1) + throw SysError("chdir to '%s' failed", dir); + std::string base(baseNameOf(path)); + if (base.size() + 1 >= sizeof(addr.sun_path)) + throw Error("socket path '%s' is too long", base); + memcpy(addr.sun_path, base.c_str(), base.size() + 1); + if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) + throw SysError("cannot connect to socket at '%s'", path); + writeFull(pipe.writeSide.get(), "0\n"); + } catch (SysError & e) { + writeFull(pipe.writeSide.get(), fmt("%d\n", e.errNo)); + } catch (...) { + writeFull(pipe.writeSide.get(), "-1\n"); + } }); - int status = pid.wait(); - if (status != 0) + pipe.writeSide.close(); + auto errNo = string2Int(chomp(drainFD(pipe.readSide.get()))); + if (!errNo || *errNo == -1) throw Error("cannot connect to socket at '%s'", path); + else if (*errNo > 0) { + errno = *errNo; + throw SysError("cannot connect to socket at '%s'", path); + } } else { memcpy(addr.sun_path, path.c_str(), path.size() + 1); if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) diff --git a/tests/functional/gc-non-blocking.sh b/tests/functional/gc-non-blocking.sh index 0d781485d..ec280badb 100644 --- a/tests/functional/gc-non-blocking.sh +++ b/tests/functional/gc-non-blocking.sh @@ -6,27 +6,42 @@ needLocalStore "the GC test needs a synchronisation point" clearStore -fifo=$TEST_ROOT/test.fifo -mkfifo "$fifo" +# This FIFO is read just after the global GC lock has been acquired, +# but before the root server is started. +fifo1=$TEST_ROOT/test2.fifo +mkfifo "$fifo1" + +# This FIFO is read just after the roots have been read, but before +# the actual GC starts. +fifo2=$TEST_ROOT/test.fifo +mkfifo "$fifo2" dummy=$(nix store add-path ./simple.nix) running=$TEST_ROOT/running touch $running -(_NIX_TEST_GC_SYNC=$fifo nix-store --gc -vvvvv; rm $running) & +# Start GC. +(_NIX_TEST_GC_SYNC_1=$fifo1 _NIX_TEST_GC_SYNC_2=$fifo2 nix-store --gc -vvvvv; rm $running) & pid=$! sleep 2 +# Delay the start of the root server to check that the build below +# correctly handles ENOENT when connecting to the root server. +(sleep 1; echo > $fifo1) & +pid2=$! + +# Start a build. This should not be blocked by the GC in progress. outPath=$(nix-build --max-silent-time 60 -o "$TEST_ROOT/result" -E " with import ./config.nix; mkDerivation { name = \"non-blocking\"; - buildCommand = \"set -x; test -e $running; mkdir \$out; echo > $fifo\"; + buildCommand = \"set -x; test -e $running; mkdir \$out; echo > $fifo2\"; }") wait $pid +wait $pid2 (! test -e $running) (! test -e $dummy)