From 19326ac2979f0d989835105a5d816a943a6bc7f2 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 14 Mar 2023 18:58:12 +0100 Subject: [PATCH 1/5] Simplify commonChildInit() --- src/libstore/build/hook-instance.cc | 2 +- src/libstore/build/local-derivation-goal.cc | 2 +- src/libutil/util.cc | 4 ++-- src/libutil/util.hh | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libstore/build/hook-instance.cc b/src/libstore/build/hook-instance.cc index cb58a1f02..ea2ae210e 100644 --- a/src/libstore/build/hook-instance.cc +++ b/src/libstore/build/hook-instance.cc @@ -35,7 +35,7 @@ HookInstance::HookInstance() /* Fork the hook. */ pid = startProcess([&]() { - commonChildInit(fromHook); + commonChildInit(fromHook.writeSide.get()); if (chdir("/") == -1) throw SysError("changing into /"); diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 6fb9b86e0..61c1cbccf 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -1649,7 +1649,7 @@ void LocalDerivationGoal::runChild() try { /* child */ - commonChildInit(builderOut); + commonChildInit(builderOut.writeSide.get()); try { setupSeccomp(); diff --git a/src/libutil/util.cc b/src/libutil/util.cc index c1de4fb33..c605a33e6 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -1968,7 +1968,7 @@ std::string showBytes(uint64_t bytes) // FIXME: move to libstore/build -void commonChildInit(Pipe & logPipe) +void commonChildInit(int stderrFd) { logger = makeSimpleLogger(); @@ -1983,7 +1983,7 @@ void commonChildInit(Pipe & logPipe) throw SysError("creating a new session"); /* Dup the write side of the logger pipe into stderr. */ - if (dup2(logPipe.writeSide.get(), STDERR_FILENO) == -1) + if (dup2(stderrFd, STDERR_FILENO) == -1) throw SysError("cannot pipe standard error into log file"); /* Dup stderr to stdout. */ diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 326c6b143..52ca36fd1 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -704,7 +704,7 @@ typedef std::function PathFilter; extern PathFilter defaultPathFilter; /* Common initialisation performed in child processes. */ -void commonChildInit(Pipe & logPipe); +void commonChildInit(int stderrFd); /* Create a Unix domain socket. */ AutoCloseFD createUnixDomainSocket(); From c536e00c9deeac58bc4b3299dbc702604c32adbe Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 14 Mar 2023 19:10:59 +0100 Subject: [PATCH 2/5] Move pseudoterminal slave open to child Hopefully this fixes "unexpected EOF" failures on macOS (#3137, #3605, #7242, #7702). The problem appears to be that under some circumstances, macOS discards the output written to the slave side of the pseudoterminal. Hence the parent never sees the "sandbox initialized" message from the child, even though it succeeded. The conditions are: * The child finishes very quickly. That's why this bug is likely to trigger in nix-env tests, since that uses a builtin builder. Adding a short sleep before the child exits makes the problem go away. * The parent has closed its duplicate of the slave file descriptor. This shouldn't matter, since the child has a duplicate as well, but it does. E.g. moving the close to the bottom of startBuilder() makes the problem go away. However, that's not a solution because it would make Nix hang if the child dies before sending the "sandbox initialized" message. * The system is under high load. E.g. "make installcheck -j16" makes the issue pretty reproducible, while it's very rare under "make installcheck -j1". As a fix/workaround, we now open the pseudoterminal slave in the child, rather than the parent. This removes the second condition (i.e. the parent no longer needs to close the slave fd) and I haven't been able to reproduce the "unexpected EOF" with this. --- src/libstore/build/local-derivation-goal.cc | 58 ++++++++++----------- src/libstore/build/local-derivation-goal.hh | 4 ++ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 61c1cbccf..84a5a79b0 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -802,15 +802,13 @@ void LocalDerivationGoal::startBuilder() /* Create the log file. */ Path logFile = openLogFile(); - /* Create a pipe to get the output of the builder. */ - //builderOut.create(); - + /* Create a pseudoterminal to get the output of the builder. */ builderOut.readSide = posix_openpt(O_RDWR | O_NOCTTY); if (!builderOut.readSide) throw SysError("opening pseudoterminal master"); // FIXME: not thread-safe, use ptsname_r - std::string slaveName(ptsname(builderOut.readSide.get())); + slaveName = ptsname(builderOut.readSide.get()); if (buildUser) { if (chmod(slaveName.c_str(), 0600)) @@ -826,30 +824,9 @@ void LocalDerivationGoal::startBuilder() } #endif - #if 0 - // Mount the pt in the sandbox so that the "tty" command works. - // FIXME: this doesn't work with the new devpts in the sandbox. - if (useChroot) - dirsInChroot[slaveName] = {slaveName, false}; - #endif - if (unlockpt(builderOut.readSide.get())) throw SysError("unlocking pseudoterminal"); - builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); - if (!builderOut.writeSide) - throw SysError("opening pseudoterminal slave"); - - // Put the pt into raw mode to prevent \n -> \r\n translation. - struct termios term; - if (tcgetattr(builderOut.writeSide.get(), &term)) - throw SysError("getting pseudoterminal attributes"); - - cfmakeraw(&term); - - if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term)) - throw SysError("putting pseudoterminal into raw mode"); - buildResult.startTime = time(0); /* Fork a child to build the package. */ @@ -897,7 +874,11 @@ void LocalDerivationGoal::startBuilder() usingUserNamespace = userNamespacesSupported(); + Pipe sendPid; + sendPid.create(); + Pid helper = startProcess([&]() { + sendPid.readSide.close(); /* Drop additional groups here because we can't do it after we've created the new user namespace. FIXME: @@ -919,11 +900,12 @@ void LocalDerivationGoal::startBuilder() pid_t child = startProcess([&]() { runChild(); }, options); - writeFull(builderOut.writeSide.get(), - fmt("%d %d\n", usingUserNamespace, child)); + writeFull(sendPid.writeSide.get(), fmt("%d\n", child)); _exit(0); }); + sendPid.writeSide.close(); + if (helper.wait() != 0) throw Error("unable to start build process"); @@ -935,10 +917,9 @@ void LocalDerivationGoal::startBuilder() userNamespaceSync.writeSide = -1; }); - auto ss = tokenizeString>(readLine(builderOut.readSide.get())); - assert(ss.size() == 2); - usingUserNamespace = ss[0] == "1"; - pid = string2Int(ss[1]).value(); + auto ss = tokenizeString>(readLine(sendPid.readSide.get())); + assert(ss.size() == 1); + pid = string2Int(ss[0]).value(); if (usingUserNamespace) { /* Set the UID/GID mapping of the builder's user namespace @@ -1649,6 +1630,21 @@ void LocalDerivationGoal::runChild() try { /* child */ + /* Open the slave side of the pseudoterminal. */ + builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); + if (!builderOut.writeSide) + throw SysError("opening pseudoterminal slave"); + + // Put the pt into raw mode to prevent \n -> \r\n translation. + struct termios term; + if (tcgetattr(builderOut.writeSide.get(), &term)) + throw SysError("getting pseudoterminal attributes"); + + cfmakeraw(&term); + + if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term)) + throw SysError("putting pseudoterminal into raw mode"); + commonChildInit(builderOut.writeSide.get()); try { diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index 34c4e9187..c8537c7e5 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -27,6 +27,10 @@ struct LocalDerivationGoal : public DerivationGoal /* Pipe for the builder's standard output/error. */ Pipe builderOut; + /* Slave side of the pseudoterminal used for the builder's + standard output/error. */ + Path slaveName; + /* Pipe for synchronising updates to the builder namespaces. */ Pipe userNamespaceSync; From cb28e4fe2a552000272793e248ea3f6a69d625d6 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 15 Mar 2023 10:33:02 +0100 Subject: [PATCH 3/5] Remove "unexpected EOF" retry hack --- mk/run-test.sh | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/mk/run-test.sh b/mk/run-test.sh index 305396c11..1a1d65930 100755 --- a/mk/run-test.sh +++ b/mk/run-test.sh @@ -27,18 +27,6 @@ run_test () { run_test -# Hack: Retry the test if it fails with “unexpected EOF reading a line” as these -# appear randomly without anyone knowing why. -# See https://github.com/NixOS/nix/issues/3605 for more info -if [[ $status -ne 0 && $status -ne 99 && \ - "$(uname)" == "Darwin" && \ - "$log" =~ "unexpected EOF reading a line" \ -]]; then - echo "$post_run_msg [${yellow}FAIL$normal] (possibly flaky, so will be retried)" - echo "$log" | sed 's/^/ /' - run_test -fi - if [ $status -eq 0 ]; then echo "$post_run_msg [${green}PASS$normal]" elif [ $status -eq 99 ]; then From 6029c763c2c5998dc3265152425c8ff0ce01b1a0 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 15 Mar 2023 10:37:39 +0100 Subject: [PATCH 4/5] Change builderOut from Pipe to AutoCloseFD --- src/libstore/build/local-derivation-goal.cc | 31 ++++++++++----------- src/libstore/build/local-derivation-goal.hh | 5 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 84a5a79b0..f1ddf753f 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -292,7 +292,7 @@ void LocalDerivationGoal::closeReadPipes() if (hook) { DerivationGoal::closeReadPipes(); } else - builderOut.readSide = -1; + builderOut.close(); } @@ -803,12 +803,12 @@ void LocalDerivationGoal::startBuilder() Path logFile = openLogFile(); /* Create a pseudoterminal to get the output of the builder. */ - builderOut.readSide = posix_openpt(O_RDWR | O_NOCTTY); - if (!builderOut.readSide) + builderOut = posix_openpt(O_RDWR | O_NOCTTY); + if (!builderOut) throw SysError("opening pseudoterminal master"); // FIXME: not thread-safe, use ptsname_r - slaveName = ptsname(builderOut.readSide.get()); + slaveName = ptsname(builderOut.get()); if (buildUser) { if (chmod(slaveName.c_str(), 0600)) @@ -819,12 +819,12 @@ void LocalDerivationGoal::startBuilder() } #if __APPLE__ else { - if (grantpt(builderOut.readSide.get())) + if (grantpt(builderOut.get())) throw SysError("granting access to pseudoterminal slave"); } #endif - if (unlockpt(builderOut.readSide.get())) + if (unlockpt(builderOut.get())) throw SysError("unlocking pseudoterminal"); buildResult.startTime = time(0); @@ -980,15 +980,14 @@ void LocalDerivationGoal::startBuilder() /* parent */ pid.setSeparatePG(true); - builderOut.writeSide = -1; - worker.childStarted(shared_from_this(), {builderOut.readSide.get()}, true, true); + worker.childStarted(shared_from_this(), {builderOut.get()}, true, true); /* Check if setting up the build environment failed. */ std::vector msgs; while (true) { std::string msg = [&]() { try { - return readLine(builderOut.readSide.get()); + return readLine(builderOut.get()); } catch (Error & e) { auto status = pid.wait(); e.addTrace({}, "while waiting for the build environment for '%s' to initialize (%s, previous messages: %s)", @@ -1000,7 +999,7 @@ void LocalDerivationGoal::startBuilder() }(); if (msg.substr(0, 1) == "\2") break; if (msg.substr(0, 1) == "\1") { - FdSource source(builderOut.readSide.get()); + FdSource source(builderOut.get()); auto ex = readError(source); ex.addTrace({}, "while setting up the build environment"); throw ex; @@ -1631,21 +1630,21 @@ void LocalDerivationGoal::runChild() try { /* child */ /* Open the slave side of the pseudoterminal. */ - builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); - if (!builderOut.writeSide) + AutoCloseFD builderOut = open(slaveName.c_str(), O_RDWR | O_NOCTTY); + if (!builderOut) throw SysError("opening pseudoterminal slave"); // Put the pt into raw mode to prevent \n -> \r\n translation. struct termios term; - if (tcgetattr(builderOut.writeSide.get(), &term)) + if (tcgetattr(builderOut.get(), &term)) throw SysError("getting pseudoterminal attributes"); cfmakeraw(&term); - if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term)) + if (tcsetattr(builderOut.get(), TCSANOW, &term)) throw SysError("putting pseudoterminal into raw mode"); - commonChildInit(builderOut.writeSide.get()); + commonChildInit(builderOut.get()); try { setupSeccomp(); @@ -2887,7 +2886,7 @@ void LocalDerivationGoal::deleteTmpDir(bool force) bool LocalDerivationGoal::isReadDesc(int fd) { return (hook && DerivationGoal::isReadDesc(fd)) || - (!hook && fd == builderOut.readSide.get()); + (!hook && fd == builderOut.get()); } diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index c8537c7e5..c3e8562ae 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -24,8 +24,9 @@ struct LocalDerivationGoal : public DerivationGoal /* The path of the temporary directory in the sandbox. */ Path tmpDirInSandbox; - /* Pipe for the builder's standard output/error. */ - Pipe builderOut; + /* Master side of the pseudoterminal used for the builder's + standard output/error. */ + AutoCloseFD builderOut; /* Slave side of the pseudoterminal used for the builder's standard output/error. */ From 5eb8bfd0f17f950ec181d59fb9fbb2330f778935 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 15 Mar 2023 10:41:38 +0100 Subject: [PATCH 5/5] Remove a variable in LocalDerivationGoal --- src/libstore/build/local-derivation-goal.cc | 8 ++++---- src/libstore/build/local-derivation-goal.hh | 6 +----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index f1ddf753f..538edef74 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -808,7 +808,7 @@ void LocalDerivationGoal::startBuilder() throw SysError("opening pseudoterminal master"); // FIXME: not thread-safe, use ptsname_r - slaveName = ptsname(builderOut.get()); + std::string slaveName = ptsname(builderOut.get()); if (buildUser) { if (chmod(slaveName.c_str(), 0600)) @@ -898,7 +898,7 @@ void LocalDerivationGoal::startBuilder() if (usingUserNamespace) options.cloneFlags |= CLONE_NEWUSER; - pid_t child = startProcess([&]() { runChild(); }, options); + pid_t child = startProcess([&]() { runChild(slaveName); }, options); writeFull(sendPid.writeSide.get(), fmt("%d\n", child)); _exit(0); @@ -974,7 +974,7 @@ void LocalDerivationGoal::startBuilder() #endif { pid = startProcess([&]() { - runChild(); + runChild(slaveName); }); } @@ -1620,7 +1620,7 @@ void setupSeccomp() } -void LocalDerivationGoal::runChild() +void LocalDerivationGoal::runChild(const Path & slaveName) { /* Warning: in the child we should absolutely not make any SQLite calls! */ diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index c3e8562ae..4d2f1ac28 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -28,10 +28,6 @@ struct LocalDerivationGoal : public DerivationGoal standard output/error. */ AutoCloseFD builderOut; - /* Slave side of the pseudoterminal used for the builder's - standard output/error. */ - Path slaveName; - /* Pipe for synchronising updates to the builder namespaces. */ Pipe userNamespaceSync; @@ -173,7 +169,7 @@ struct LocalDerivationGoal : public DerivationGoal int getChildStatus() override; /* Run the builder's process. */ - void runChild(); + void runChild(const std::string & slaveName); /* Check that the derivation outputs all exist and register them as valid. */