diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index e1cc504f8..e5ba3ac0d 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -209,7 +209,7 @@ void LocalDerivationGoal::tryLocalBuild() #if __linux__ if (useChroot) { - if (!mountNamespacesSupported() || !pidNamespacesSupported()) { + if (!mountAndPidNamespacesSupported()) { if (!settings.sandboxFallback) throw Error("this system does not support the kernel namespaces that are required for sandboxing; use '--no-sandbox' to disable sandboxing"); debug("auto-disabling sandboxing because the prerequisite namespaces are not available"); diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index fdd52d92b..f66accb10 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -4,7 +4,7 @@ #include "util.hh" #include "finally.hh" -#include +#include namespace nix { @@ -33,63 +33,60 @@ bool userNamespacesSupported() return false; } - Pid pid = startProcess([&]() - { - auto res = unshare(CLONE_NEWUSER); - _exit(res ? 1 : 0); - }); + try { + Pid pid = startProcess([&]() + { + _exit(0); + }, { + .cloneFlags = CLONE_NEWUSER + }); - bool supported = pid.wait() == 0; + auto r = pid.wait(); + assert(!r); + } catch (SysError & e) { + debug("user namespaces do not work on this system: %s", e.msg()); + return false; + } - if (!supported) - debug("user namespaces do not work on this system"); - - return supported; + return true; }(); return res; } -bool mountNamespacesSupported() +bool mountAndPidNamespacesSupported() { static auto res = [&]() -> bool { - bool useUserNamespace = userNamespacesSupported(); + try { - Pid pid = startProcess([&]() - { - auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0)); - _exit(res ? 1 : 0); - }); + Pid pid = startProcess([&]() + { + /* Make sure we don't remount the parent's /proc. */ + if (mount(0, "/", 0, MS_PRIVATE | MS_REC, 0) == -1) + _exit(1); - bool supported = pid.wait() == 0; + /* Test whether we can remount /proc. The kernel disallows + this if /proc is not fully visible, i.e. if there are + filesystems mounted on top of files inside /proc. See + https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */ + if (mount("none", "/proc", "proc", 0, 0) == -1) + _exit(2); - if (!supported) - debug("mount namespaces do not work on this system"); + _exit(0); + }, { + .cloneFlags = CLONE_NEWNS | CLONE_NEWPID | (userNamespacesSupported() ? CLONE_NEWUSER : 0) + }); - return supported; - }(); - return res; -} - -bool pidNamespacesSupported() -{ - static auto res = [&]() -> bool - { - /* Check whether /proc is fully visible, i.e. there are no - filesystems mounted on top of files inside /proc. If this - is not the case, then we cannot mount a new /proc inside - the sandbox that matches the sandbox's PID namespace. - See https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */ - auto fp = fopen("/proc/mounts", "r"); - if (!fp) return false; - Finally delFP = [&]() { fclose(fp); }; - - while (auto ent = getmntent(fp)) - if (hasPrefix(std::string_view(ent->mnt_dir), "/proc/")) { - debug("PID namespaces do not work because /proc is not fully visible; disabling sandboxing"); + if (pid.wait()) { + debug("PID namespaces do not work on this system: cannot remount /proc"); return false; } + } catch (SysError & e) { + debug("mount namespaces do not work on this system: %s", e.msg()); + return false; + } + return true; }(); return res; diff --git a/src/libutil/namespaces.hh b/src/libutil/namespaces.hh index 34e54d5ad..e82379b9c 100644 --- a/src/libutil/namespaces.hh +++ b/src/libutil/namespaces.hh @@ -6,9 +6,7 @@ namespace nix { bool userNamespacesSupported(); -bool mountNamespacesSupported(); - -bool pidNamespacesSupported(); +bool mountAndPidNamespacesSupported(); #endif diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 40faa4bf2..94da37561 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -36,6 +36,7 @@ #ifdef __linux__ #include #include +#include #include #endif @@ -1051,9 +1052,17 @@ static pid_t doFork(bool allowVfork, std::function fun) } +static int childEntry(void * arg) +{ + auto main = (std::function *) arg; + (*main)(); + return 1; +} + + pid_t startProcess(std::function fun, const ProcessOptions & options) { - auto wrapper = [&]() { + std::function wrapper = [&]() { if (!options.allowVfork) logger = makeSimpleLogger(); try { @@ -1073,7 +1082,23 @@ pid_t startProcess(std::function fun, const ProcessOptions & options) _exit(1); }; - pid_t pid = doFork(options.allowVfork, wrapper); + pid_t pid = -1; + + if (options.cloneFlags) { + // Not supported, since then we don't know when to free the stack. + assert(!(options.cloneFlags & CLONE_VM)); + + size_t stackSize = 1 * 1024 * 1024; + auto stack = (char *) mmap(0, stackSize, + PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (stack == MAP_FAILED) throw SysError("allocating stack"); + + Finally freeStack([&]() { munmap(stack, stackSize); }); + + pid = clone(childEntry, stack + stackSize, options.cloneFlags | SIGCHLD, &wrapper); + } else + pid = doFork(options.allowVfork, wrapper); + if (pid == -1) throw SysError("unable to fork"); return pid; diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 266da0ae3..95562280e 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -301,6 +301,7 @@ struct ProcessOptions bool dieWithParent = true; bool runExitHandlers = false; bool allowVfork = false; + int cloneFlags = 0; // use clone() with the specified flags (Linux only) }; pid_t startProcess(std::function fun, const ProcessOptions & options = ProcessOptions());