From fb2f7f5dcc6b37a4f39f59d9f477d3fa57d79095 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 25 Jan 2023 17:31:27 +0100 Subject: [PATCH 1/6] Fix auto-uid-allocation in Docker containers This didn't work because sandboxing doesn't work in Docker. However, the sandboxing check is done lazily - after clone(CLONE_NEWNS) fails, we retry with sandboxing disabled. But at that point, we've already done UID allocation under the assumption that user namespaces are enabled. So let's get rid of the "goto fallback" logic and just detect early whether user / mount namespaces are enabled. This commit also gets rid of a compatibility hack for some ancient Linux kernels (<2.13). --- src/libstore/build/local-derivation-goal.cc | 82 +++++---------------- src/libutil/namespaces.cc | 63 ++++++++++++++++ src/libutil/namespaces.hh | 9 +++ 3 files changed, 91 insertions(+), 63 deletions(-) create mode 100644 src/libutil/namespaces.cc create mode 100644 src/libutil/namespaces.hh diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 8ff83f748..16955e326 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -16,6 +16,7 @@ #include "json-utils.hh" #include "cgroup.hh" #include "personality.hh" +#include "namespaces.hh" #include #include @@ -167,7 +168,8 @@ void LocalDerivationGoal::killSandbox(bool getStats) } -void LocalDerivationGoal::tryLocalBuild() { +void LocalDerivationGoal::tryLocalBuild() +{ unsigned int curBuilds = worker.getNrLocalBuilds(); if (curBuilds >= settings.maxBuildJobs) { state = &DerivationGoal::tryToBuild; @@ -205,6 +207,17 @@ void LocalDerivationGoal::tryLocalBuild() { #endif } + #if __linux__ + if (useChroot) { + if (!mountNamespacesSupported()) { + if (!settings.sandboxFallback) + throw Error("this system does not support mount namespaces, which are required for sandboxing"); + debug("auto-disabling sandboxing because mount namespaces are not available"); + useChroot = false; + } + } + #endif + if (useBuildUsers()) { if (!buildUser) buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot); @@ -888,12 +901,7 @@ void LocalDerivationGoal::startBuilder() userNamespaceSync.create(); - Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces"; - static bool userNamespacesEnabled = - pathExists(maxUserNamespaces) - && trim(readFile(maxUserNamespaces)) != "0"; - - usingUserNamespace = userNamespacesEnabled; + usingUserNamespace = userNamespacesSupported(); Pid helper = startProcess([&]() { @@ -920,64 +928,15 @@ void LocalDerivationGoal::startBuilder() flags |= CLONE_NEWUSER; pid_t child = clone(childEntry, stack + stackSize, flags, this); - if (child == -1 && errno == EINVAL) { - /* Fallback for Linux < 2.13 where CLONE_NEWPID and - CLONE_PARENT are not allowed together. */ - flags &= ~CLONE_NEWPID; - child = clone(childEntry, stack + stackSize, flags, this); - } - if (usingUserNamespace && child == -1 && (errno == EPERM || errno == EINVAL)) { - /* Some distros patch Linux to not allow unprivileged - * user namespaces. If we get EPERM or EINVAL, try - * without CLONE_NEWUSER and see if that works. - * Details: https://salsa.debian.org/kernel-team/linux/-/commit/d98e00eda6bea437e39b9e80444eee84a32438a6 - */ - usingUserNamespace = false; - flags &= ~CLONE_NEWUSER; - child = clone(childEntry, stack + stackSize, flags, this); - } - if (child == -1) { - switch(errno) { - case EPERM: - case EINVAL: { - int errno_ = errno; - if (!userNamespacesEnabled && errno==EPERM) - notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/user/max_user_namespaces"); - if (userNamespacesEnabled) { - Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone"; - if (pathExists(procSysKernelUnprivilegedUsernsClone) - && trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") { - notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/kernel/unprivileged_userns_clone"); - } - } - Path procSelfNsUser = "/proc/self/ns/user"; - if (!pathExists(procSelfNsUser)) - notice("/proc/self/ns/user does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing"); - /* Otherwise exit with EPERM so we can handle this in the - parent. This is only done when sandbox-fallback is set - to true (the default). */ - if (settings.sandboxFallback) - _exit(1); - /* Mention sandbox-fallback in the error message so the user - knows that having it disabled contributed to the - unrecoverability of this failure */ - throw SysError(errno_, "creating sandboxed builder process using clone(), without sandbox-fallback"); - } - default: - throw SysError("creating sandboxed builder process using clone()"); - } - } + + if (child == -1) + throw SysError("creating sandboxed builder process using clone()"); writeFull(builderOut.writeSide.get(), fmt("%d %d\n", usingUserNamespace, child)); _exit(0); }); - int res = helper.wait(); - if (res != 0 && settings.sandboxFallback) { - useChroot = false; - initTmpDir(); - goto fallback; - } else if (res != 0) + if (helper.wait() != 0) throw Error("unable to start build process"); userNamespaceSync.readSide = -1; @@ -1045,9 +1004,6 @@ void LocalDerivationGoal::startBuilder() } else #endif { -#if __linux__ - fallback: -#endif pid = startProcess([&]() { runChild(); }); diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc new file mode 100644 index 000000000..0c3c3cbdd --- /dev/null +++ b/src/libutil/namespaces.cc @@ -0,0 +1,63 @@ +#include "namespaces.hh" +#include "util.hh" + +#if __linux__ + +namespace nix { + +bool userNamespacesSupported() +{ + static bool res = [&]() -> bool + { + if (!pathExists("/proc/self/ns/user")) { + notice("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing"); + return false; + } + + Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces"; + if (!pathExists(maxUserNamespaces) || + trim(readFile(maxUserNamespaces)) == "0") + { + notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/user/max_user_namespaces'"); + return false; + } + + Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone"; + if (pathExists(procSysKernelUnprivilegedUsernsClone) + && trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") + { + notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/kernel/unprivileged_userns_clone'"); + return false; + } + + Pid pid = startProcess([&]() + { + auto res = unshare(CLONE_NEWUSER); + _exit(res ? 1 : 0); + }); + + return pid.wait() == 0; + }(); + return res; +} + +bool mountNamespacesSupported() +{ + static bool res = [&]() -> bool + { + bool useUserNamespace = userNamespacesSupported(); + + Pid pid = startProcess([&]() + { + auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0)); + _exit(res ? 1 : 0); + }); + + return pid.wait() == 0; + }(); + return res; +} + +} + +#endif diff --git a/src/libutil/namespaces.hh b/src/libutil/namespaces.hh new file mode 100644 index 000000000..4ed6cb683 --- /dev/null +++ b/src/libutil/namespaces.hh @@ -0,0 +1,9 @@ +#pragma once + +namespace nix { + +bool userNamespacesSupported(); + +bool mountNamespacesSupported(); + +} From bc1d9fd8b5a14334af1d0455e6b4d595cae959d5 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 27 Jan 2023 15:25:56 +0100 Subject: [PATCH 2/6] Check whether we can use PID namespaces In unprivileged podman containers, /proc is not fully visible (there are other filesystems mounted on subdirectories of /proc). Therefore we can't mount a new /proc in the sandbox that matches the PID namespace of the sandbox. So this commit automatically disables sandboxing if /proc is not fully visible. --- src/libstore/build/local-derivation-goal.cc | 6 ++-- src/libutil/namespaces.cc | 37 ++++++++++++++++++--- src/libutil/namespaces.hh | 2 ++ 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 16955e326..a99da0b5f 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -209,10 +209,10 @@ void LocalDerivationGoal::tryLocalBuild() #if __linux__ if (useChroot) { - if (!mountNamespacesSupported()) { + if (!mountNamespacesSupported() || !pidNamespacesSupported()) { if (!settings.sandboxFallback) - throw Error("this system does not support mount namespaces, which are required for sandboxing"); - debug("auto-disabling sandboxing because mount namespaces are not available"); + throw Error("this system does not support the kernel namespaces that are required for sandboxing"); + debug("auto-disabling sandboxing because the prerequisite namespaces are not available"); useChroot = false; } } diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index 0c3c3cbdd..222f0d11b 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -1,5 +1,8 @@ #include "namespaces.hh" #include "util.hh" +#include "finally.hh" + +#include #if __linux__ @@ -7,10 +10,10 @@ namespace nix { bool userNamespacesSupported() { - static bool res = [&]() -> bool + static auto res = [&]() -> bool { if (!pathExists("/proc/self/ns/user")) { - notice("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing"); + debug("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y"); return false; } @@ -18,7 +21,7 @@ bool userNamespacesSupported() if (!pathExists(maxUserNamespaces) || trim(readFile(maxUserNamespaces)) == "0") { - notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/user/max_user_namespaces'"); + debug("user namespaces appear to be disabled; check '/proc/sys/user/max_user_namespaces'"); return false; } @@ -26,7 +29,7 @@ bool userNamespacesSupported() if (pathExists(procSysKernelUnprivilegedUsernsClone) && trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") { - notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/kernel/unprivileged_userns_clone'"); + debug("user namespaces appear to be disabled; check '/proc/sys/kernel/unprivileged_userns_clone'"); return false; } @@ -43,7 +46,7 @@ bool userNamespacesSupported() bool mountNamespacesSupported() { - static bool res = [&]() -> bool + static auto res = [&]() -> bool { bool useUserNamespace = userNamespacesSupported(); @@ -58,6 +61,30 @@ bool mountNamespacesSupported() return res; } +bool pidNamespacesSupported() +{ + static auto res = [&]() -> bool + { + /* Check whether /proc is fully visible, i.e. there are no + filesystems mounted on top of files inside /proc. If this + is not the case, then we cannot mount a new /proc inside + the sandbox that matches the sandbox's PID namespace. + See https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */ + auto fp = fopen("/proc/mounts", "r"); + if (!fp) return false; + Finally delFP = [&]() { fclose(fp); }; + + while (auto ent = getmntent(fp)) + if (hasPrefix(std::string_view(ent->mnt_dir), "/proc/")) { + debug("PID namespaces do not work because /proc is not fully visible; disabling sandboxing"); + return false; + } + + return true; + }(); + return res; +} + } #endif diff --git a/src/libutil/namespaces.hh b/src/libutil/namespaces.hh index 4ed6cb683..ad7bb559e 100644 --- a/src/libutil/namespaces.hh +++ b/src/libutil/namespaces.hh @@ -6,4 +6,6 @@ bool userNamespacesSupported(); bool mountNamespacesSupported(); +bool pidNamespacesSupported(); + } From d834de2894b5addc5a4a8c5088debd56a8517db1 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 27 Jan 2023 16:52:01 +0100 Subject: [PATCH 3/6] Fix macOS build --- src/libutil/namespaces.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index 222f0d11b..b1cdbfe03 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -1,11 +1,11 @@ +#if __linux__ + #include "namespaces.hh" #include "util.hh" #include "finally.hh" #include -#if __linux__ - namespace nix { bool userNamespacesSupported() From 4e61877b5c64096fa3ea63bf5ead7e17e1ddef66 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 27 Jan 2023 16:52:31 +0100 Subject: [PATCH 4/6] More #ifdef --- src/libutil/namespaces.hh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libutil/namespaces.hh b/src/libutil/namespaces.hh index ad7bb559e..34e54d5ad 100644 --- a/src/libutil/namespaces.hh +++ b/src/libutil/namespaces.hh @@ -2,10 +2,14 @@ namespace nix { +#if __linux__ + bool userNamespacesSupported(); bool mountNamespacesSupported(); bool pidNamespacesSupported(); +#endif + } From c5c0617d6fe6810c35ec56d3116ef523f3f38904 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 7 Feb 2023 22:59:46 +0100 Subject: [PATCH 5/6] Mention --no-sandbox if sandboxing is unsupported --- src/libstore/build/local-derivation-goal.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index a99da0b5f..e1cc504f8 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -211,7 +211,7 @@ void LocalDerivationGoal::tryLocalBuild() if (useChroot) { if (!mountNamespacesSupported() || !pidNamespacesSupported()) { if (!settings.sandboxFallback) - throw Error("this system does not support the kernel namespaces that are required for sandboxing"); + throw Error("this system does not support the kernel namespaces that are required for sandboxing; use '--no-sandbox' to disable sandboxing"); debug("auto-disabling sandboxing because the prerequisite namespaces are not available"); useChroot = false; } From 0a70b411e1afaa22d8b01560de908246042daf10 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 7 Feb 2023 23:01:39 +0100 Subject: [PATCH 6/6] Print debug message if a namespace test fails --- src/libutil/namespaces.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index b1cdbfe03..fdd52d92b 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -39,7 +39,12 @@ bool userNamespacesSupported() _exit(res ? 1 : 0); }); - return pid.wait() == 0; + bool supported = pid.wait() == 0; + + if (!supported) + debug("user namespaces do not work on this system"); + + return supported; }(); return res; } @@ -56,7 +61,12 @@ bool mountNamespacesSupported() _exit(res ? 1 : 0); }); - return pid.wait() == 0; + bool supported = pid.wait() == 0; + + if (!supported) + debug("mount namespaces do not work on this system"); + + return supported; }(); return res; }