From c536e00c9deeac58bc4b3299dbc702604c32adbe Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 14 Mar 2023 19:10:59 +0100 Subject: [PATCH] Move pseudoterminal slave open to child Hopefully this fixes "unexpected EOF" failures on macOS (#3137, #3605, #7242, #7702). The problem appears to be that under some circumstances, macOS discards the output written to the slave side of the pseudoterminal. Hence the parent never sees the "sandbox initialized" message from the child, even though it succeeded. The conditions are: * The child finishes very quickly. That's why this bug is likely to trigger in nix-env tests, since that uses a builtin builder. Adding a short sleep before the child exits makes the problem go away. * The parent has closed its duplicate of the slave file descriptor. This shouldn't matter, since the child has a duplicate as well, but it does. E.g. moving the close to the bottom of startBuilder() makes the problem go away. However, that's not a solution because it would make Nix hang if the child dies before sending the "sandbox initialized" message. * The system is under high load. E.g. "make installcheck -j16" makes the issue pretty reproducible, while it's very rare under "make installcheck -j1". As a fix/workaround, we now open the pseudoterminal slave in the child, rather than the parent. This removes the second condition (i.e. the parent no longer needs to close the slave fd) and I haven't been able to reproduce the "unexpected EOF" with this. --- src/libstore/build/local-derivation-goal.cc | 58 ++++++++++----------- src/libstore/build/local-derivation-goal.hh | 4 ++ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 61c1cbccf..84a5a79b0 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -802,15 +802,13 @@ void LocalDerivationGoal::startBuilder() /* Create the log file. */ Path logFile = openLogFile(); - /* Create a pipe to get the output of the builder. */ - //builderOut.create(); - + /* Create a pseudoterminal to get the output of the builder. */ builderOut.readSide = posix_openpt(O_RDWR | O_NOCTTY); if (!builderOut.readSide) throw SysError("opening pseudoterminal master"); // FIXME: not thread-safe, use ptsname_r - std::string slaveName(ptsname(builderOut.readSide.get())); + slaveName = ptsname(builderOut.readSide.get()); if (buildUser) { if (chmod(slaveName.c_str(), 0600)) @@ -826,30 +824,9 @@ void LocalDerivationGoal::startBuilder() } #endif - #if 0 - // Mount the pt in the sandbox so that the "tty" command works. - // FIXME: this doesn't work with the new devpts in the sandbox. - if (useChroot) - dirsInChroot[slaveName] = {slaveName, false}; - #endif - if (unlockpt(builderOut.readSide.get())) throw SysError("unlocking pseudoterminal"); - builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); - if (!builderOut.writeSide) - throw SysError("opening pseudoterminal slave"); - - // Put the pt into raw mode to prevent \n -> \r\n translation. - struct termios term; - if (tcgetattr(builderOut.writeSide.get(), &term)) - throw SysError("getting pseudoterminal attributes"); - - cfmakeraw(&term); - - if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term)) - throw SysError("putting pseudoterminal into raw mode"); - buildResult.startTime = time(0); /* Fork a child to build the package. */ @@ -897,7 +874,11 @@ void LocalDerivationGoal::startBuilder() usingUserNamespace = userNamespacesSupported(); + Pipe sendPid; + sendPid.create(); + Pid helper = startProcess([&]() { + sendPid.readSide.close(); /* Drop additional groups here because we can't do it after we've created the new user namespace. FIXME: @@ -919,11 +900,12 @@ void LocalDerivationGoal::startBuilder() pid_t child = startProcess([&]() { runChild(); }, options); - writeFull(builderOut.writeSide.get(), - fmt("%d %d\n", usingUserNamespace, child)); + writeFull(sendPid.writeSide.get(), fmt("%d\n", child)); _exit(0); }); + sendPid.writeSide.close(); + if (helper.wait() != 0) throw Error("unable to start build process"); @@ -935,10 +917,9 @@ void LocalDerivationGoal::startBuilder() userNamespaceSync.writeSide = -1; }); - auto ss = tokenizeString>(readLine(builderOut.readSide.get())); - assert(ss.size() == 2); - usingUserNamespace = ss[0] == "1"; - pid = string2Int(ss[1]).value(); + auto ss = tokenizeString>(readLine(sendPid.readSide.get())); + assert(ss.size() == 1); + pid = string2Int(ss[0]).value(); if (usingUserNamespace) { /* Set the UID/GID mapping of the builder's user namespace @@ -1649,6 +1630,21 @@ void LocalDerivationGoal::runChild() try { /* child */ + /* Open the slave side of the pseudoterminal. */ + builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); + if (!builderOut.writeSide) + throw SysError("opening pseudoterminal slave"); + + // Put the pt into raw mode to prevent \n -> \r\n translation. + struct termios term; + if (tcgetattr(builderOut.writeSide.get(), &term)) + throw SysError("getting pseudoterminal attributes"); + + cfmakeraw(&term); + + if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term)) + throw SysError("putting pseudoterminal into raw mode"); + commonChildInit(builderOut.writeSide.get()); try { diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index 34c4e9187..c8537c7e5 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -27,6 +27,10 @@ struct LocalDerivationGoal : public DerivationGoal /* Pipe for the builder's standard output/error. */ Pipe builderOut; + /* Slave side of the pseudoterminal used for the builder's + standard output/error. */ + Path slaveName; + /* Pipe for synchronising updates to the builder namespaces. */ Pipe userNamespaceSync;