Separate cgroup support from auto-uid-allocation

The new experimental feature 'cgroups' enables the use of cgroups for
all builds. This allows better containment and enables setting
resource limits and getting some build stats.
This commit is contained in:
Eelco Dolstra 2022-11-18 10:39:28 +01:00
parent f1ab082ac4
commit 128910ba23
6 changed files with 93 additions and 108 deletions

View file

@ -15,6 +15,7 @@
#include "topo-sort.hh"
#include "callback.hh"
#include "json-utils.hh"
#include "cgroup.hh"
#include <regex>
#include <queue>
@ -129,26 +130,36 @@ void LocalDerivationGoal::killChild()
if (pid != -1) {
worker.childTerminated(this);
if (buildUser) {
/* If we're using a build user, then there is a tricky
race condition: if we kill the build user before the
child has done its setuid() to the build user uid, then
it won't be killed, and we'll potentially lock up in
pid.wait(). So also send a conventional kill to the
child. */
::kill(-pid, SIGKILL); /* ignore the result */
buildUser->kill();
pid.wait();
} else
pid.kill();
/* If we're using a build user, then there is a tricky race
condition: if we kill the build user before the child has
done its setuid() to the build user uid, then it won't be
killed, and we'll potentially lock up in pid.wait(). So
also send a conventional kill to the child. */
::kill(-pid, SIGKILL); /* ignore the result */
assert(pid == -1);
killSandbox();
pid.wait();
}
DerivationGoal::killChild();
}
void LocalDerivationGoal::killSandbox()
{
if (cgroup) {
destroyCgroup(*cgroup);
}
else if (buildUser) {
auto uid = buildUser->getUID();
assert(uid != 0);
killUser(uid);
}
}
void LocalDerivationGoal::tryLocalBuild() {
unsigned int curBuilds = worker.getNrLocalBuilds();
if (curBuilds >= settings.maxBuildJobs) {
@ -169,10 +180,6 @@ void LocalDerivationGoal::tryLocalBuild() {
worker.waitForAWhile(shared_from_this());
return;
}
/* Make sure that no other processes are executing under this
uid. */
buildUser->kill();
}
actLock.reset();
@ -263,7 +270,7 @@ void LocalDerivationGoal::cleanupPostChildKill()
malicious user from leaving behind a process that keeps files
open and modifies them after they have been chown'ed to
root. */
if (buildUser) buildUser->kill();
killSandbox();
/* Terminate the recursive Nix daemon. */
stopDaemon();
@ -356,6 +363,55 @@ static void linkOrCopy(const Path & from, const Path & to)
void LocalDerivationGoal::startBuilder()
{
if ((buildUser && buildUser->getUIDCount() != 1)
|| settings.isExperimentalFeatureEnabled(Xp::Cgroups))
{
#if __linux__
auto ourCgroups = getCgroups("/proc/self/cgroup");
auto ourCgroup = ourCgroups[""];
if (ourCgroup == "")
throw Error("cannot determine cgroup name from /proc/self/cgroup");
auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup);
if (!pathExists(ourCgroupPath))
throw Error("expected cgroup directory '%s'", ourCgroupPath);
static std::atomic<unsigned int> counter{0};
cgroup = buildUser
? fmt("%s/nix-build-uid-%d", ourCgroupPath, buildUser->getUID())
: fmt("%s/nix-build-pid-%d-%d", ourCgroupPath, getpid(), counter++);
debug("using cgroup '%s'", *cgroup);
/* When using a build user, record the cgroup we used for that
user so that if we got interrupted previously, we can kill
any left-over cgroup first. */
if (buildUser) {
auto cgroupsDir = settings.nixStateDir + "/cgroups";
createDirs(cgroupsDir);
auto cgroupFile = fmt("%s/%d", cgroupsDir, buildUser->getUID());
if (pathExists(cgroupFile)) {
auto prevCgroup = readFile(cgroupFile);
destroyCgroup(prevCgroup);
}
writeFile(cgroupFile, *cgroup);
}
#else
throw Error("cgroups are not supported on this platform");
#endif
}
/* Make sure that no other processes are executing under the
sandbox uids. This must be done before any chownToBuilder()
calls. */
killSandbox();
/* Right platform? */
if (!parsedDrv->canBuildLocally(worker.store))
throw Error("a '%s' with features {%s} is required to build '%s', but I am a '%s' with features {%s}",
@ -646,13 +702,13 @@ void LocalDerivationGoal::startBuilder()
dirsInChroot.erase(worker.store.printStorePath(*i.second.second));
}
if (buildUser) {
if (auto cgroup = buildUser->getCgroup()) {
chownToBuilder(*cgroup);
chownToBuilder(*cgroup + "/cgroup.procs");
chownToBuilder(*cgroup + "/cgroup.threads");
//chownToBuilder(*cgroup + "/cgroup.subtree_control");
}
if (cgroup) {
if (mkdir(cgroup->c_str(), 0755) != 0)
throw SysError("creating cgroup '%s'", *cgroup);
chownToBuilder(*cgroup);
chownToBuilder(*cgroup + "/cgroup.procs");
chownToBuilder(*cgroup + "/cgroup.threads");
//chownToBuilder(*cgroup + "/cgroup.subtree_control");
}
#else
@ -965,10 +1021,8 @@ void LocalDerivationGoal::startBuilder()
}
/* Move the child into its own cgroup. */
if (buildUser) {
if (auto cgroup = buildUser->getCgroup())
writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid));
}
if (cgroup)
writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid));
/* Signal the builder that we've updated its user namespace. */
writeFull(userNamespaceSync.writeSide.get(), "1");
@ -1838,7 +1892,7 @@ void LocalDerivationGoal::runChild()
/* Unshare the cgroup namespace. This means
/proc/self/cgroup will show the child's cgroup as '/'
rather than whatever it is in the parent. */
if (buildUser && buildUser->getUIDCount() != 1 && unshare(CLONE_NEWCGROUP) == -1)
if (cgroup && unshare(CLONE_NEWCGROUP) == -1)
throw SysError("unsharing cgroup namespace");
/* Do the chroot(). */

View file

@ -15,6 +15,9 @@ struct LocalDerivationGoal : public DerivationGoal
/* The process ID of the builder. */
Pid pid;
/* The cgroup of the builder, if any. */
std::optional<Path> cgroup;
/* The temporary directory. */
Path tmpDir;
@ -197,6 +200,10 @@ struct LocalDerivationGoal : public DerivationGoal
/* Forcibly kill the child process, if any. */
void killChild() override;
/* Kill any processes running under the build user UID or in the
cgroup of the build. */
void killSandbox();
/* Create alternative path calculated from but distinct from the
input, so we can avoid overwriting outputs (or other store paths)
that already exist. */

View file

@ -1,7 +1,6 @@
#include "lock.hh"
#include "globals.hh"
#include "pathlocks.hh"
#include "cgroup.hh"
#include <pwd.h>
#include <grp.h>
@ -15,11 +14,6 @@ struct SimpleUserLock : UserLock
gid_t gid;
std::vector<gid_t> supplementaryGIDs;
void kill() override
{
killUser(uid);
}
uid_t getUID() override { assert(uid); return uid; }
uid_t getUIDCount() override { return 1; }
gid_t getGID() override { assert(gid); return gid; }
@ -116,32 +110,6 @@ struct AutoUserLock : UserLock
AutoCloseFD fdUserLock;
uid_t firstUid = 0;
uid_t nrIds = 1;
#if __linux__
std::optional<Path> cgroup;
#endif
~AutoUserLock()
{
#if __linux__
// Get rid of our cgroup, ignoring errors.
if (cgroup) rmdir(cgroup->c_str());
#endif
}
void kill() override
{
#if __linux__
if (cgroup) {
destroyCgroup(*cgroup);
if (mkdir(cgroup->c_str(), 0755) == -1)
throw SysError("creating cgroup '%s'", *cgroup);
} else
#endif
{
assert(firstUid);
killUser(firstUid);
}
}
uid_t getUID() override { assert(firstUid); return firstUid; }
@ -183,55 +151,16 @@ struct AutoUserLock : UserLock
throw SysError("opening user lock '%s'", fnUserLock);
if (lockFile(fd.get(), ltWrite, false)) {
auto s = drainFD(fd.get());
#if __linux__
if (s != "") {
/* Kill the old cgroup, to ensure there are no
processes left over from an interrupted build. */
destroyCgroup(s);
}
#endif
if (ftruncate(fd.get(), 0) == -1)
throw Error("truncating user lock");
auto lock = std::make_unique<AutoUserLock>();
lock->fdUserLock = std::move(fd);
lock->firstUid = settings.startId + i * maxIdsPerBuild;
lock->nrIds = nrIds;
#if __linux__
if (nrIds > 1) {
auto ourCgroups = getCgroups("/proc/self/cgroup");
auto ourCgroup = ourCgroups[""];
if (ourCgroup == "")
throw Error("cannot determine cgroup name from /proc/self/cgroup");
auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup);
if (!pathExists(ourCgroupPath))
throw Error("expected cgroup directory '%s'", ourCgroupPath);
lock->cgroup = fmt("%s/nix-build-%d", ourCgroupPath, lock->firstUid);
/* Record the cgroup in the lock file. This ensures that
if we subsequently get executed under a different parent
cgroup, we kill the previous cgroup first. */
writeFull(lock->fdUserLock.get(), *lock->cgroup);
}
#endif
return lock;
}
}
return nullptr;
}
#if __linux__
std::optional<Path> getCgroup() override { return cgroup; }
#endif
};
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds)

View file

@ -27,13 +27,6 @@ struct UserLock
virtual gid_t getGID() = 0;
virtual std::vector<gid_t> getSupplementaryGIDs() = 0;
/* Kill any processes currently executing as this user. */
virtual void kill() = 0;
#if __linux__
virtual std::optional<Path> getCgroup() { return {}; };
#endif
};
/* Acquire a user lock for a UID range of size `nrIds`. Note that this

View file

@ -15,6 +15,7 @@ std::map<ExperimentalFeature, std::string> stringifiedXpFeatures = {
{ Xp::FetchClosure, "fetch-closure" },
{ Xp::ReplFlake, "repl-flake" },
{ Xp::AutoAllocateUids, "auto-allocate-uids" },
{ Xp::Cgroups, "cgroups" },
};
const std::optional<ExperimentalFeature> parseExperimentalFeature(const std::string_view & name)

View file

@ -24,6 +24,7 @@ enum struct ExperimentalFeature
FetchClosure,
ReplFlake,
AutoAllocateUids,
Cgroups,
};
/**