forked from lix-project/lix
Run builds in their own cgroup
Also, run builds in a cgroup namespace (ensuring /proc/self/cgroup doesn't leak information about the outside world) and mount /sys. This enables running systemd-nspawn and thus NixOS containers in a Nix build.
This commit is contained in:
parent
c3e0a68c7e
commit
f5fa3de759
1 changed files with 65 additions and 1 deletions
|
@ -2168,7 +2168,8 @@ void DerivationGoal::startBuilder()
|
||||||
if (mkdir(chrootRootDir.c_str(), 0755) == -1)
|
if (mkdir(chrootRootDir.c_str(), 0755) == -1)
|
||||||
throw SysError("cannot create '%1%'", chrootRootDir);
|
throw SysError("cannot create '%1%'", chrootRootDir);
|
||||||
|
|
||||||
if (buildUser && chown(chrootRootDir.c_str(), 0, buildUser->getGID()) == -1)
|
// FIXME: only make root writable for user namespace builds.
|
||||||
|
if (buildUser && chown(chrootRootDir.c_str(), buildUser->getUID(), buildUser->getGID()) == -1)
|
||||||
throw SysError("cannot change ownership of '%1%'", chrootRootDir);
|
throw SysError("cannot change ownership of '%1%'", chrootRootDir);
|
||||||
|
|
||||||
/* Create a writable /tmp in the chroot. Many builders need
|
/* Create a writable /tmp in the chroot. Many builders need
|
||||||
|
@ -2182,6 +2183,7 @@ void DerivationGoal::startBuilder()
|
||||||
nobody account. The latter is kind of a hack to support
|
nobody account. The latter is kind of a hack to support
|
||||||
Samba-in-QEMU. */
|
Samba-in-QEMU. */
|
||||||
createDirs(chrootRootDir + "/etc");
|
createDirs(chrootRootDir + "/etc");
|
||||||
|
chownToBuilder(chrootRootDir + "/etc");
|
||||||
|
|
||||||
writeFile(chrootRootDir + "/etc/passwd", fmt(
|
writeFile(chrootRootDir + "/etc/passwd", fmt(
|
||||||
"root:x:0:0:Nix build user:%3%:/noshell\n"
|
"root:x:0:0:Nix build user:%3%:/noshell\n"
|
||||||
|
@ -2372,6 +2374,52 @@ void DerivationGoal::startBuilder()
|
||||||
|
|
||||||
#if __linux__
|
#if __linux__
|
||||||
if (useChroot) {
|
if (useChroot) {
|
||||||
|
/* Create a cgroup. */
|
||||||
|
// FIXME: do we want to use the parent cgroup? We should
|
||||||
|
// always use the same cgroup regardless of whether we're the
|
||||||
|
// daemon or run from a user session via sudo.
|
||||||
|
std::string msg;
|
||||||
|
std::vector<Path> cgroups;
|
||||||
|
for (auto & line : tokenizeString<std::vector<std::string>>(readFile("/proc/self/cgroup"), "\n")) {
|
||||||
|
static std::regex regex("([0-9]+):([^:]*):(.*)");
|
||||||
|
std::smatch match;
|
||||||
|
if (!std::regex_match(line, match, regex))
|
||||||
|
throw Error("invalid line '%s' in '/proc/self/cgroup'", line);
|
||||||
|
|
||||||
|
/* We only create a systemd cgroup, since that's enough
|
||||||
|
for running systemd-nspawn. */
|
||||||
|
std::string name;
|
||||||
|
if (match[2] == "name=systemd")
|
||||||
|
name = "systemd";
|
||||||
|
//else if (match[2] == "")
|
||||||
|
// name = "unified";
|
||||||
|
else continue;
|
||||||
|
|
||||||
|
std::string cgroup = match[3];
|
||||||
|
|
||||||
|
auto hostCgroup = canonPath("/sys/fs/cgroup/" + name + "/" + cgroup);
|
||||||
|
|
||||||
|
if (!pathExists(hostCgroup))
|
||||||
|
throw Error("expected unified cgroup directory '%s'", hostCgroup);
|
||||||
|
|
||||||
|
auto childCgroup = fmt("%s/nix-%d", hostCgroup, buildUser->getUID());
|
||||||
|
|
||||||
|
// FIXME: if the cgroup already exists, kill all processes
|
||||||
|
// in it and destroy it.
|
||||||
|
|
||||||
|
if (mkdir(childCgroup.c_str(), 0755) == -1 && errno != EEXIST)
|
||||||
|
throw SysError("creating cgroup '%s'", childCgroup);
|
||||||
|
|
||||||
|
chownToBuilder(childCgroup);
|
||||||
|
chownToBuilder(childCgroup + "/cgroup.procs");
|
||||||
|
if (name == "unified") {
|
||||||
|
chownToBuilder(childCgroup + "/cgroup.threads");
|
||||||
|
chownToBuilder(childCgroup + "/cgroup.subtree_control");
|
||||||
|
}
|
||||||
|
|
||||||
|
cgroups.push_back(childCgroup);
|
||||||
|
}
|
||||||
|
|
||||||
/* Set up private namespaces for the build:
|
/* Set up private namespaces for the build:
|
||||||
|
|
||||||
- The PID namespace causes the build to start as PID 1.
|
- The PID namespace causes the build to start as PID 1.
|
||||||
|
@ -2496,6 +2544,10 @@ void DerivationGoal::startBuilder()
|
||||||
if (sandboxMountNamespace.get() == -1)
|
if (sandboxMountNamespace.get() == -1)
|
||||||
throw SysError("getting sandbox mount namespace");
|
throw SysError("getting sandbox mount namespace");
|
||||||
|
|
||||||
|
/* Move the child into its own cgroup. */
|
||||||
|
for (auto & childCgroup : cgroups)
|
||||||
|
writeFile(childCgroup + "/cgroup.procs", fmt("%d", (pid_t) pid));
|
||||||
|
|
||||||
/* Signal the builder that we've updated its user namespace. */
|
/* Signal the builder that we've updated its user namespace. */
|
||||||
writeFull(userNamespaceSync.writeSide.get(), "1");
|
writeFull(userNamespaceSync.writeSide.get(), "1");
|
||||||
userNamespaceSync.writeSide = -1;
|
userNamespaceSync.writeSide = -1;
|
||||||
|
@ -3279,6 +3331,12 @@ void DerivationGoal::runChild()
|
||||||
if (mount("none", (chrootRootDir + "/proc").c_str(), "proc", 0, 0) == -1)
|
if (mount("none", (chrootRootDir + "/proc").c_str(), "proc", 0, 0) == -1)
|
||||||
throw SysError("mounting /proc");
|
throw SysError("mounting /proc");
|
||||||
|
|
||||||
|
/* Mount sysfs on /sys. FIXME: only in user namespace
|
||||||
|
builds. */
|
||||||
|
createDirs(chrootRootDir + "/sys");
|
||||||
|
if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1)
|
||||||
|
throw SysError("mounting /sys");
|
||||||
|
|
||||||
/* Mount a new tmpfs on /dev/shm to ensure that whatever
|
/* Mount a new tmpfs on /dev/shm to ensure that whatever
|
||||||
the builder puts in /dev/shm is cleaned up automatically. */
|
the builder puts in /dev/shm is cleaned up automatically. */
|
||||||
if (pathExists("/dev/shm") && mount("none", (chrootRootDir + "/dev/shm").c_str(), "tmpfs", 0,
|
if (pathExists("/dev/shm") && mount("none", (chrootRootDir + "/dev/shm").c_str(), "tmpfs", 0,
|
||||||
|
@ -3321,6 +3379,12 @@ void DerivationGoal::runChild()
|
||||||
if (unshare(CLONE_NEWNS) == -1)
|
if (unshare(CLONE_NEWNS) == -1)
|
||||||
throw SysError("unsharing mount namespace");
|
throw SysError("unsharing mount namespace");
|
||||||
|
|
||||||
|
/* Unshare the cgroup namespace. This means
|
||||||
|
/proc/self/cgroup will show the child's cgroup as '/'
|
||||||
|
rather than whatever it is in the parent. */
|
||||||
|
if (unshare(CLONE_NEWCGROUP) == -1)
|
||||||
|
throw SysError("unsharing cgroup namespace");
|
||||||
|
|
||||||
/* Do the chroot(). */
|
/* Do the chroot(). */
|
||||||
if (chdir(chrootRootDir.c_str()) == -1)
|
if (chdir(chrootRootDir.c_str()) == -1)
|
||||||
throw SysError("cannot change directory to '%1%'", chrootRootDir);
|
throw SysError("cannot change directory to '%1%'", chrootRootDir);
|
||||||
|
|
Loading…
Reference in a new issue