forked from lix-project/lix
Fix auto-uid-allocation in Docker containers
This didn't work because sandboxing doesn't work in Docker. However, the sandboxing check is done lazily - after clone(CLONE_NEWNS) fails, we retry with sandboxing disabled. But at that point, we've already done UID allocation under the assumption that user namespaces are enabled. So let's get rid of the "goto fallback" logic and just detect early whether user / mount namespaces are enabled. This commit also gets rid of a compatibility hack for some ancient Linux kernels (<2.13).
This commit is contained in:
parent
1ba13b17db
commit
fb2f7f5dcc
3 changed files with 91 additions and 63 deletions
|
@ -16,6 +16,7 @@
|
|||
#include "json-utils.hh"
|
||||
#include "cgroup.hh"
|
||||
#include "personality.hh"
|
||||
#include "namespaces.hh"
|
||||
|
||||
#include <regex>
|
||||
#include <queue>
|
||||
|
@ -167,7 +168,8 @@ void LocalDerivationGoal::killSandbox(bool getStats)
|
|||
}
|
||||
|
||||
|
||||
void LocalDerivationGoal::tryLocalBuild() {
|
||||
void LocalDerivationGoal::tryLocalBuild()
|
||||
{
|
||||
unsigned int curBuilds = worker.getNrLocalBuilds();
|
||||
if (curBuilds >= settings.maxBuildJobs) {
|
||||
state = &DerivationGoal::tryToBuild;
|
||||
|
@ -205,6 +207,17 @@ void LocalDerivationGoal::tryLocalBuild() {
|
|||
#endif
|
||||
}
|
||||
|
||||
#if __linux__
|
||||
if (useChroot) {
|
||||
if (!mountNamespacesSupported()) {
|
||||
if (!settings.sandboxFallback)
|
||||
throw Error("this system does not support mount namespaces, which are required for sandboxing");
|
||||
debug("auto-disabling sandboxing because mount namespaces are not available");
|
||||
useChroot = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (useBuildUsers()) {
|
||||
if (!buildUser)
|
||||
buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot);
|
||||
|
@ -888,12 +901,7 @@ void LocalDerivationGoal::startBuilder()
|
|||
|
||||
userNamespaceSync.create();
|
||||
|
||||
Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces";
|
||||
static bool userNamespacesEnabled =
|
||||
pathExists(maxUserNamespaces)
|
||||
&& trim(readFile(maxUserNamespaces)) != "0";
|
||||
|
||||
usingUserNamespace = userNamespacesEnabled;
|
||||
usingUserNamespace = userNamespacesSupported();
|
||||
|
||||
Pid helper = startProcess([&]() {
|
||||
|
||||
|
@ -920,64 +928,15 @@ void LocalDerivationGoal::startBuilder()
|
|||
flags |= CLONE_NEWUSER;
|
||||
|
||||
pid_t child = clone(childEntry, stack + stackSize, flags, this);
|
||||
if (child == -1 && errno == EINVAL) {
|
||||
/* Fallback for Linux < 2.13 where CLONE_NEWPID and
|
||||
CLONE_PARENT are not allowed together. */
|
||||
flags &= ~CLONE_NEWPID;
|
||||
child = clone(childEntry, stack + stackSize, flags, this);
|
||||
}
|
||||
if (usingUserNamespace && child == -1 && (errno == EPERM || errno == EINVAL)) {
|
||||
/* Some distros patch Linux to not allow unprivileged
|
||||
* user namespaces. If we get EPERM or EINVAL, try
|
||||
* without CLONE_NEWUSER and see if that works.
|
||||
* Details: https://salsa.debian.org/kernel-team/linux/-/commit/d98e00eda6bea437e39b9e80444eee84a32438a6
|
||||
*/
|
||||
usingUserNamespace = false;
|
||||
flags &= ~CLONE_NEWUSER;
|
||||
child = clone(childEntry, stack + stackSize, flags, this);
|
||||
}
|
||||
if (child == -1) {
|
||||
switch(errno) {
|
||||
case EPERM:
|
||||
case EINVAL: {
|
||||
int errno_ = errno;
|
||||
if (!userNamespacesEnabled && errno==EPERM)
|
||||
notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/user/max_user_namespaces");
|
||||
if (userNamespacesEnabled) {
|
||||
Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone";
|
||||
if (pathExists(procSysKernelUnprivilegedUsernsClone)
|
||||
&& trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") {
|
||||
notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/kernel/unprivileged_userns_clone");
|
||||
}
|
||||
}
|
||||
Path procSelfNsUser = "/proc/self/ns/user";
|
||||
if (!pathExists(procSelfNsUser))
|
||||
notice("/proc/self/ns/user does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing");
|
||||
/* Otherwise exit with EPERM so we can handle this in the
|
||||
parent. This is only done when sandbox-fallback is set
|
||||
to true (the default). */
|
||||
if (settings.sandboxFallback)
|
||||
_exit(1);
|
||||
/* Mention sandbox-fallback in the error message so the user
|
||||
knows that having it disabled contributed to the
|
||||
unrecoverability of this failure */
|
||||
throw SysError(errno_, "creating sandboxed builder process using clone(), without sandbox-fallback");
|
||||
}
|
||||
default:
|
||||
|
||||
if (child == -1)
|
||||
throw SysError("creating sandboxed builder process using clone()");
|
||||
}
|
||||
}
|
||||
writeFull(builderOut.writeSide.get(),
|
||||
fmt("%d %d\n", usingUserNamespace, child));
|
||||
_exit(0);
|
||||
});
|
||||
|
||||
int res = helper.wait();
|
||||
if (res != 0 && settings.sandboxFallback) {
|
||||
useChroot = false;
|
||||
initTmpDir();
|
||||
goto fallback;
|
||||
} else if (res != 0)
|
||||
if (helper.wait() != 0)
|
||||
throw Error("unable to start build process");
|
||||
|
||||
userNamespaceSync.readSide = -1;
|
||||
|
@ -1045,9 +1004,6 @@ void LocalDerivationGoal::startBuilder()
|
|||
} else
|
||||
#endif
|
||||
{
|
||||
#if __linux__
|
||||
fallback:
|
||||
#endif
|
||||
pid = startProcess([&]() {
|
||||
runChild();
|
||||
});
|
||||
|
|
63
src/libutil/namespaces.cc
Normal file
63
src/libutil/namespaces.cc
Normal file
|
@ -0,0 +1,63 @@
|
|||
#include "namespaces.hh"
|
||||
#include "util.hh"
|
||||
|
||||
#if __linux__
|
||||
|
||||
namespace nix {
|
||||
|
||||
bool userNamespacesSupported()
|
||||
{
|
||||
static bool res = [&]() -> bool
|
||||
{
|
||||
if (!pathExists("/proc/self/ns/user")) {
|
||||
notice("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing");
|
||||
return false;
|
||||
}
|
||||
|
||||
Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces";
|
||||
if (!pathExists(maxUserNamespaces) ||
|
||||
trim(readFile(maxUserNamespaces)) == "0")
|
||||
{
|
||||
notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/user/max_user_namespaces'");
|
||||
return false;
|
||||
}
|
||||
|
||||
Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone";
|
||||
if (pathExists(procSysKernelUnprivilegedUsernsClone)
|
||||
&& trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0")
|
||||
{
|
||||
notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/kernel/unprivileged_userns_clone'");
|
||||
return false;
|
||||
}
|
||||
|
||||
Pid pid = startProcess([&]()
|
||||
{
|
||||
auto res = unshare(CLONE_NEWUSER);
|
||||
_exit(res ? 1 : 0);
|
||||
});
|
||||
|
||||
return pid.wait() == 0;
|
||||
}();
|
||||
return res;
|
||||
}
|
||||
|
||||
bool mountNamespacesSupported()
|
||||
{
|
||||
static bool res = [&]() -> bool
|
||||
{
|
||||
bool useUserNamespace = userNamespacesSupported();
|
||||
|
||||
Pid pid = startProcess([&]()
|
||||
{
|
||||
auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0));
|
||||
_exit(res ? 1 : 0);
|
||||
});
|
||||
|
||||
return pid.wait() == 0;
|
||||
}();
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
9
src/libutil/namespaces.hh
Normal file
9
src/libutil/namespaces.hh
Normal file
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
namespace nix {
|
||||
|
||||
bool userNamespacesSupported();
|
||||
|
||||
bool mountNamespacesSupported();
|
||||
|
||||
}
|
Loading…
Reference in a new issue