Fix auto-uid-allocation in Docker containers

This didn't work because sandboxing doesn't work in Docker. However,
the sandboxing check is done lazily - after clone(CLONE_NEWNS) fails,
we retry with sandboxing disabled. But at that point, we've already
done UID allocation under the assumption that user namespaces are
enabled.

So let's get rid of the "goto fallback" logic and just detect early
whether user / mount namespaces are enabled.

This commit also gets rid of a compatibility hack for some ancient
Linux kernels (<2.13).
This commit is contained in:
Eelco Dolstra 2023-01-25 17:31:27 +01:00
parent 1ba13b17db
commit fb2f7f5dcc
3 changed files with 91 additions and 63 deletions

View file

@ -16,6 +16,7 @@
#include "json-utils.hh" #include "json-utils.hh"
#include "cgroup.hh" #include "cgroup.hh"
#include "personality.hh" #include "personality.hh"
#include "namespaces.hh"
#include <regex> #include <regex>
#include <queue> #include <queue>
@ -167,7 +168,8 @@ void LocalDerivationGoal::killSandbox(bool getStats)
} }
void LocalDerivationGoal::tryLocalBuild() { void LocalDerivationGoal::tryLocalBuild()
{
unsigned int curBuilds = worker.getNrLocalBuilds(); unsigned int curBuilds = worker.getNrLocalBuilds();
if (curBuilds >= settings.maxBuildJobs) { if (curBuilds >= settings.maxBuildJobs) {
state = &DerivationGoal::tryToBuild; state = &DerivationGoal::tryToBuild;
@ -205,6 +207,17 @@ void LocalDerivationGoal::tryLocalBuild() {
#endif #endif
} }
#if __linux__
if (useChroot) {
if (!mountNamespacesSupported()) {
if (!settings.sandboxFallback)
throw Error("this system does not support mount namespaces, which are required for sandboxing");
debug("auto-disabling sandboxing because mount namespaces are not available");
useChroot = false;
}
}
#endif
if (useBuildUsers()) { if (useBuildUsers()) {
if (!buildUser) if (!buildUser)
buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot); buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot);
@ -888,12 +901,7 @@ void LocalDerivationGoal::startBuilder()
userNamespaceSync.create(); userNamespaceSync.create();
Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces"; usingUserNamespace = userNamespacesSupported();
static bool userNamespacesEnabled =
pathExists(maxUserNamespaces)
&& trim(readFile(maxUserNamespaces)) != "0";
usingUserNamespace = userNamespacesEnabled;
Pid helper = startProcess([&]() { Pid helper = startProcess([&]() {
@ -920,64 +928,15 @@ void LocalDerivationGoal::startBuilder()
flags |= CLONE_NEWUSER; flags |= CLONE_NEWUSER;
pid_t child = clone(childEntry, stack + stackSize, flags, this); pid_t child = clone(childEntry, stack + stackSize, flags, this);
if (child == -1 && errno == EINVAL) {
/* Fallback for Linux < 2.13 where CLONE_NEWPID and if (child == -1)
CLONE_PARENT are not allowed together. */ throw SysError("creating sandboxed builder process using clone()");
flags &= ~CLONE_NEWPID;
child = clone(childEntry, stack + stackSize, flags, this);
}
if (usingUserNamespace && child == -1 && (errno == EPERM || errno == EINVAL)) {
/* Some distros patch Linux to not allow unprivileged
* user namespaces. If we get EPERM or EINVAL, try
* without CLONE_NEWUSER and see if that works.
* Details: https://salsa.debian.org/kernel-team/linux/-/commit/d98e00eda6bea437e39b9e80444eee84a32438a6
*/
usingUserNamespace = false;
flags &= ~CLONE_NEWUSER;
child = clone(childEntry, stack + stackSize, flags, this);
}
if (child == -1) {
switch(errno) {
case EPERM:
case EINVAL: {
int errno_ = errno;
if (!userNamespacesEnabled && errno==EPERM)
notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/user/max_user_namespaces");
if (userNamespacesEnabled) {
Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone";
if (pathExists(procSysKernelUnprivilegedUsernsClone)
&& trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") {
notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/kernel/unprivileged_userns_clone");
}
}
Path procSelfNsUser = "/proc/self/ns/user";
if (!pathExists(procSelfNsUser))
notice("/proc/self/ns/user does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing");
/* Otherwise exit with EPERM so we can handle this in the
parent. This is only done when sandbox-fallback is set
to true (the default). */
if (settings.sandboxFallback)
_exit(1);
/* Mention sandbox-fallback in the error message so the user
knows that having it disabled contributed to the
unrecoverability of this failure */
throw SysError(errno_, "creating sandboxed builder process using clone(), without sandbox-fallback");
}
default:
throw SysError("creating sandboxed builder process using clone()");
}
}
writeFull(builderOut.writeSide.get(), writeFull(builderOut.writeSide.get(),
fmt("%d %d\n", usingUserNamespace, child)); fmt("%d %d\n", usingUserNamespace, child));
_exit(0); _exit(0);
}); });
int res = helper.wait(); if (helper.wait() != 0)
if (res != 0 && settings.sandboxFallback) {
useChroot = false;
initTmpDir();
goto fallback;
} else if (res != 0)
throw Error("unable to start build process"); throw Error("unable to start build process");
userNamespaceSync.readSide = -1; userNamespaceSync.readSide = -1;
@ -1045,9 +1004,6 @@ void LocalDerivationGoal::startBuilder()
} else } else
#endif #endif
{ {
#if __linux__
fallback:
#endif
pid = startProcess([&]() { pid = startProcess([&]() {
runChild(); runChild();
}); });

63
src/libutil/namespaces.cc Normal file
View file

@ -0,0 +1,63 @@
#include "namespaces.hh"
#include "util.hh"
#if __linux__
namespace nix {
bool userNamespacesSupported()
{
static bool res = [&]() -> bool
{
if (!pathExists("/proc/self/ns/user")) {
notice("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing");
return false;
}
Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces";
if (!pathExists(maxUserNamespaces) ||
trim(readFile(maxUserNamespaces)) == "0")
{
notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/user/max_user_namespaces'");
return false;
}
Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone";
if (pathExists(procSysKernelUnprivilegedUsernsClone)
&& trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0")
{
notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/kernel/unprivileged_userns_clone'");
return false;
}
Pid pid = startProcess([&]()
{
auto res = unshare(CLONE_NEWUSER);
_exit(res ? 1 : 0);
});
return pid.wait() == 0;
}();
return res;
}
bool mountNamespacesSupported()
{
static bool res = [&]() -> bool
{
bool useUserNamespace = userNamespacesSupported();
Pid pid = startProcess([&]()
{
auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0));
_exit(res ? 1 : 0);
});
return pid.wait() == 0;
}();
return res;
}
}
#endif

View file

@ -0,0 +1,9 @@
#pragma once
namespace nix {
bool userNamespacesSupported();
bool mountNamespacesSupported();
}