Split auto UID allocation from cgroups

Cgroups are now only used for derivations that require the uid-range
range feature. This allows auto UID allocation even on systems that
don't have cgroups (like macOS).

Also, make things work on modern systems that use cgroups v2 (where
there is a single hierarchy and no "systemd" controller).
This commit is contained in:
Eelco Dolstra 2022-11-08 16:03:42 +01:00
parent 40911d7dec
commit 2fde7e0108
9 changed files with 122 additions and 96 deletions

View file

@ -160,7 +160,7 @@ void LocalDerivationGoal::tryLocalBuild() {
if (useBuildUsers()) { if (useBuildUsers()) {
if (!buildUser) if (!buildUser)
buildUser = acquireUserLock(); buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1);
if (!buildUser) { if (!buildUser) {
if (!actLock) if (!actLock)
@ -495,8 +495,8 @@ void LocalDerivationGoal::startBuilder()
} }
} }
useUidRange = parsedDrv->getRequiredSystemFeatures().count("uid-range");
useSystemdCgroup = parsedDrv->getRequiredSystemFeatures().count("Systemd-cgroup"); useSystemdCgroup = parsedDrv->getRequiredSystemFeatures().count("Systemd-cgroup");
assert(!useSystemdCgroup);
if (useChroot) { if (useChroot) {
@ -576,7 +576,8 @@ void LocalDerivationGoal::startBuilder()
printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir); printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir);
if (mkdir(chrootRootDir.c_str(), useUidRange ? 0755 : 0750) == -1) // FIXME: make this 0700
if (mkdir(chrootRootDir.c_str(), buildUser && buildUser->getUIDCount() != 1 ? 0755 : 0750) == -1)
throw SysError("cannot create '%1%'", chrootRootDir); throw SysError("cannot create '%1%'", chrootRootDir);
// FIXME: only make root writable for user namespace builds. // FIXME: only make root writable for user namespace builds.
@ -596,8 +597,8 @@ void LocalDerivationGoal::startBuilder()
createDirs(chrootRootDir + "/etc"); createDirs(chrootRootDir + "/etc");
chownToBuilder(chrootRootDir + "/etc"); chownToBuilder(chrootRootDir + "/etc");
if (useUidRange && (!buildUser || buildUser->getUIDCount() < 65536)) if (parsedDrv->useUidRange() && (!buildUser || buildUser->getUIDCount() < 65536))
throw Error("feature 'uid-range' requires '%s' to be enabled", settings.autoAllocateUids.name); throw Error("feature 'uid-range' requires the setting '%s' to be enabled", settings.autoAllocateUids.name);
/* Declare the build user's group so that programs get a consistent /* Declare the build user's group so that programs get a consistent
view of the system (e.g., "id -gn"). */ view of the system (e.g., "id -gn"). */
@ -670,7 +671,7 @@ void LocalDerivationGoal::startBuilder()
#endif #endif
#endif #endif
} else { } else {
if (useUidRange) if (parsedDrv->useUidRange())
throw Error("feature 'uid-range' is only supported in sandboxed builds"); throw Error("feature 'uid-range' is only supported in sandboxed builds");
if (useSystemdCgroup) if (useSystemdCgroup)
throw Error("feature 'systemd-cgroup' is only supported in sandboxed builds"); throw Error("feature 'systemd-cgroup' is only supported in sandboxed builds");
@ -934,12 +935,12 @@ void LocalDerivationGoal::startBuilder()
the calling user (if build users are disabled). */ the calling user (if build users are disabled). */
uid_t hostUid = buildUser ? buildUser->getUID() : getuid(); uid_t hostUid = buildUser ? buildUser->getUID() : getuid();
uid_t hostGid = buildUser ? buildUser->getGID() : getgid(); uid_t hostGid = buildUser ? buildUser->getGID() : getgid();
uint32_t nrIds = buildUser && useUidRange ? buildUser->getUIDCount() : 1; uid_t nrIds = buildUser ? buildUser->getUIDCount() : 1;
writeFile("/proc/" + std::to_string(pid) + "/uid_map", writeFile("/proc/" + std::to_string(pid) + "/uid_map",
fmt("%d %d %d", sandboxUid(), hostUid, nrIds)); fmt("%d %d %d", sandboxUid(), hostUid, nrIds));
if (!useUidRange) if (!buildUser || buildUser->getUIDCount() == 1)
writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny");
writeFile("/proc/" + std::to_string(pid) + "/gid_map", writeFile("/proc/" + std::to_string(pid) + "/gid_map",
@ -1793,7 +1794,7 @@ void LocalDerivationGoal::runChild()
throw SysError("mounting /proc"); throw SysError("mounting /proc");
/* Mount sysfs on /sys. */ /* Mount sysfs on /sys. */
if (useUidRange) { if (buildUser && buildUser->getUIDCount() != 1) {
createDirs(chrootRootDir + "/sys"); createDirs(chrootRootDir + "/sys");
if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1) if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1)
throw SysError("mounting /sys"); throw SysError("mounting /sys");

View file

@ -41,9 +41,6 @@ struct LocalDerivationGoal : public DerivationGoal
Path chrootRootDir; Path chrootRootDir;
/* Whether to give the build more than 1 UID. */
bool useUidRange = false;
/* Whether to make the 'systemd' cgroup controller available to /* Whether to make the 'systemd' cgroup controller available to
the build. */ the build. */
bool useSystemdCgroup = false; bool useSystemdCgroup = false;
@ -99,8 +96,8 @@ struct LocalDerivationGoal : public DerivationGoal
result. */ result. */
std::map<Path, ValidPathInfo> prevInfos; std::map<Path, ValidPathInfo> prevInfos;
uid_t sandboxUid() { return usingUserNamespace ? (useUidRange ? 0 : 1000) : buildUser->getUID(); } uid_t sandboxUid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 1000 : 0) : buildUser->getUID(); }
gid_t sandboxGid() { return usingUserNamespace ? (useUidRange ? 0 : 100) : buildUser->getGID(); } gid_t sandboxGid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 100 : 0) : buildUser->getGID(); }
const static Path homeDir; const static Path homeDir;

View file

@ -13,6 +13,7 @@
namespace nix { namespace nix {
// FIXME: obsolete, check for cgroup2
std::map<std::string, std::string> getCgroups(const Path & cgroupFile) std::map<std::string, std::string> getCgroups(const Path & cgroupFile)
{ {
std::map<std::string, std::string> cgroups; std::map<std::string, std::string> cgroups;

View file

@ -130,6 +130,10 @@ StringSet Settings::getDefaultSystemFeatures()
actually require anything special on the machines. */ actually require anything special on the machines. */
StringSet features{"nixos-test", "benchmark", "big-parallel"}; StringSet features{"nixos-test", "benchmark", "big-parallel"};
#if __linux__
features.insert("uid-range");
#endif
#if __linux__ #if __linux__
if (access("/dev/kvm", R_OK | W_OK) == 0) if (access("/dev/kvm", R_OK | W_OK) == 0)
features.insert("kvm"); features.insert("kvm");

View file

@ -46,6 +46,8 @@ struct PluginFilesSetting : public BaseSetting<Paths>
void set(const std::string & str, bool append = false) override; void set(const std::string & str, bool append = false) override;
}; };
const uint32_t maxIdsPerBuild = 1 << 16;
class Settings : public Config { class Settings : public Config {
unsigned int getDefaultCores(); unsigned int getDefaultCores();
@ -279,12 +281,10 @@ public:
Setting<bool> autoAllocateUids{this, false, "auto-allocate-uids", Setting<bool> autoAllocateUids{this, false, "auto-allocate-uids",
"Whether to allocate UIDs for builders automatically."}; "Whether to allocate UIDs for builders automatically."};
const uint32_t idsPerBuild = 1 << 16;
Setting<uint32_t> startId{this, 872415232, "start-id", Setting<uint32_t> startId{this, 872415232, "start-id",
"The first UID and GID to use for dynamic ID allocation."}; "The first UID and GID to use for dynamic ID allocation."};
Setting<uint32_t> uidCount{this, idsPerBuild * 128, "id-count", Setting<uint32_t> uidCount{this, maxIdsPerBuild * 128, "id-count",
"The number of UIDs/GIDs to use for dynamic ID allocation."}; "The number of UIDs/GIDs to use for dynamic ID allocation."};
#endif #endif

View file

@ -20,12 +20,8 @@ struct SimpleUserLock : UserLock
killUser(uid); killUser(uid);
} }
std::pair<uid_t, uid_t> getUIDRange() override uid_t getUID() override { assert(uid); return uid; }
{ uid_t getUIDCount() override { return 1; }
assert(uid);
return {uid, uid};
}
gid_t getGID() override { assert(gid); return gid; } gid_t getGID() override { assert(gid); return gid; }
std::vector<gid_t> getSupplementaryGIDs() override { return supplementaryGIDs; } std::vector<gid_t> getSupplementaryGIDs() override { return supplementaryGIDs; }
@ -115,48 +111,65 @@ struct SimpleUserLock : UserLock
} }
}; };
#if __linux__ struct AutoUserLock : UserLock
struct CgroupUserLock : UserLock
{ {
AutoCloseFD fdUserLock; AutoCloseFD fdUserLock;
uid_t uid; uid_t firstUid = 0;
uid_t nrIds = 1;
#if __linux__
std::optional<Path> cgroup;
#endif
~AutoUserLock()
{
// Get rid of our cgroup, ignoring errors.
if (cgroup) rmdir(cgroup->c_str());
}
void kill() override void kill() override
{ {
#if __linux__
if (cgroup) { if (cgroup) {
printError("KILL CGROUP %s", *cgroup);
destroyCgroup(*cgroup); destroyCgroup(*cgroup);
cgroup.reset(); if (mkdir(cgroup->c_str(), 0755) == -1)
throw SysError("creating cgroup '%s'", *cgroup);
} else
#endif
{
assert(firstUid);
printError("KILL USER %d", firstUid);
killUser(firstUid);
} }
} }
std::pair<uid_t, uid_t> getUIDRange() override uid_t getUID() override { assert(firstUid); return firstUid; }
{
assert(uid); gid_t getUIDCount() override { return nrIds; }
return {uid, uid + settings.idsPerBuild - 1};
}
gid_t getGID() override gid_t getGID() override
{ {
// We use the same GID ranges as for the UIDs. // We use the same GID ranges as for the UIDs.
assert(uid); assert(firstUid);
return uid; return firstUid;
} }
std::vector<gid_t> getSupplementaryGIDs() override { return {}; } std::vector<gid_t> getSupplementaryGIDs() override { return {}; }
static std::unique_ptr<UserLock> acquire() static std::unique_ptr<UserLock> acquire(uid_t nrIds)
{ {
settings.requireExperimentalFeature(Xp::AutoAllocateUids); settings.requireExperimentalFeature(Xp::AutoAllocateUids);
assert(settings.startId > 0); assert(settings.startId > 0);
assert(settings.startId % settings.idsPerBuild == 0); assert(settings.startId % maxIdsPerBuild == 0);
assert(settings.uidCount % settings.idsPerBuild == 0); assert(settings.uidCount % maxIdsPerBuild == 0);
assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits<uid_t>::max()); assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits<uid_t>::max());
assert(nrIds <= maxIdsPerBuild);
// FIXME: check whether the id range overlaps any known users // FIXME: check whether the id range overlaps any known users
createDirs(settings.nixStateDir + "/userpool2"); createDirs(settings.nixStateDir + "/userpool2");
size_t nrSlots = settings.uidCount / settings.idsPerBuild; size_t nrSlots = settings.uidCount / maxIdsPerBuild;
for (size_t i = 0; i < nrSlots; i++) { for (size_t i = 0; i < nrSlots; i++) {
debug("trying user slot '%d'", i); debug("trying user slot '%d'", i);
@ -170,11 +183,47 @@ struct CgroupUserLock : UserLock
throw SysError("opening user lock '%s'", fnUserLock); throw SysError("opening user lock '%s'", fnUserLock);
if (lockFile(fd.get(), ltWrite, false)) { if (lockFile(fd.get(), ltWrite, false)) {
auto lock = std::make_unique<CgroupUserLock>(); auto s = drainFD(fd.get());
#if __linux__
if (s != "") {
/* Kill the old cgroup, to ensure there are no
processes left over from an interrupted build. */
destroyCgroup(s);
}
#endif
if (ftruncate(fd.get(), 0) == -1)
throw Error("truncating user lock");
auto lock = std::make_unique<AutoUserLock>();
lock->fdUserLock = std::move(fd); lock->fdUserLock = std::move(fd);
lock->uid = settings.startId + i * settings.idsPerBuild; lock->firstUid = settings.startId + i * maxIdsPerBuild;
auto s = drainFD(lock->fdUserLock.get()); lock->nrIds = nrIds;
if (s != "") lock->cgroup = s;
if (nrIds > 1) {
auto ourCgroups = getCgroups("/proc/self/cgroup");
auto ourCgroup = ourCgroups[""];
if (ourCgroup == "")
throw Error("cannot determine cgroup name from /proc/self/cgroup");
auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup);
printError("PARENT CGROUP = %s", ourCgroupPath);
if (!pathExists(ourCgroupPath))
throw Error("expected cgroup directory '%s'", ourCgroupPath);
lock->cgroup = fmt("%s/nix-build-%d", ourCgroupPath, lock->firstUid);
printError("CHILD CGROUP = %s", *lock->cgroup);
/* Record the cgroup in the lock file. This ensures that
if we subsequently get executed under a different parent
cgroup, we kill the previous cgroup first. */
writeFull(lock->fdUserLock.get(), *lock->cgroup);
}
return lock; return lock;
} }
} }
@ -182,50 +231,16 @@ struct CgroupUserLock : UserLock
return nullptr; return nullptr;
} }
std::optional<Path> cgroup;
std::optional<Path> getCgroup() override
{
if (!cgroup) {
/* Create a systemd cgroup since that's the minimum
required by systemd-nspawn. */
auto ourCgroups = getCgroups("/proc/self/cgroup");
auto systemdCgroup = ourCgroups["systemd"];
if (systemdCgroup == "")
throw Error("'systemd' cgroup does not exist");
auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup);
if (!pathExists(hostCgroup))
throw Error("expected cgroup directory '%s'", hostCgroup);
cgroup = fmt("%s/nix-%d", hostCgroup, uid);
destroyCgroup(*cgroup);
if (mkdir(cgroup->c_str(), 0755) == -1)
throw SysError("creating cgroup '%s'", *cgroup);
/* Record the cgroup in the lock file. This ensures that
if we subsequently get executed under a different parent
cgroup, we kill the previous cgroup first. */
if (ftruncate(fdUserLock.get(), 0) == -1)
throw Error("truncating user lock");
writeFull(fdUserLock.get(), *cgroup);
}
return cgroup;
};
};
#endif
std::unique_ptr<UserLock> acquireUserLock()
{
#if __linux__ #if __linux__
if (settings.autoAllocateUids) std::optional<Path> getCgroup() override { return cgroup; }
return CgroupUserLock::acquire();
else
#endif #endif
};
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds)
{
if (settings.autoAllocateUids)
return AutoUserLock::acquire(nrIds);
else
return SimpleUserLock::acquire(); return SimpleUserLock::acquire();
} }

View file

@ -11,18 +11,16 @@ struct UserLock
virtual ~UserLock() { } virtual ~UserLock() { }
/* Get the first and last UID. */ /* Get the first and last UID. */
virtual std::pair<uid_t, uid_t> getUIDRange() = 0; std::pair<uid_t, uid_t> getUIDRange()
{
auto first = getUID();
return {first, first + getUIDCount() - 1};
}
/* Get the first UID. */ /* Get the first UID. */
uid_t getUID() virtual uid_t getUID() = 0;
{
return getUIDRange().first;
}
uid_t getUIDCount() virtual uid_t getUIDCount() = 0;
{
return getUIDRange().second - getUIDRange().first + 1;
}
virtual gid_t getGID() = 0; virtual gid_t getGID() = 0;
@ -31,12 +29,14 @@ struct UserLock
/* Kill any processes currently executing as this user. */ /* Kill any processes currently executing as this user. */
virtual void kill() = 0; virtual void kill() = 0;
#if __linux__
virtual std::optional<Path> getCgroup() { return {}; }; virtual std::optional<Path> getCgroup() { return {}; };
#endif
}; };
/* Acquire a user lock. Note that this may return nullptr if no user /* Acquire a user lock for a UID range of size `nrIds`. Note that this
is available. */ may return nullptr if no user is available. */
std::unique_ptr<UserLock> acquireUserLock(); std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds);
bool useBuildUsers(); bool useBuildUsers();

View file

@ -90,6 +90,7 @@ std::optional<Strings> ParsedDerivation::getStringsAttr(const std::string & name
StringSet ParsedDerivation::getRequiredSystemFeatures() const StringSet ParsedDerivation::getRequiredSystemFeatures() const
{ {
// FIXME: cache this?
StringSet res; StringSet res;
for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings())) for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings()))
res.insert(i); res.insert(i);
@ -125,6 +126,11 @@ bool ParsedDerivation::substitutesAllowed() const
return getBoolAttr("allowSubstitutes", true); return getBoolAttr("allowSubstitutes", true);
} }
bool ParsedDerivation::useUidRange() const
{
return getRequiredSystemFeatures().count("uid-range");
}
static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*"); static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*");
std::optional<nlohmann::json> ParsedDerivation::prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths) std::optional<nlohmann::json> ParsedDerivation::prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths)

View file

@ -38,6 +38,8 @@ public:
bool substitutesAllowed() const; bool substitutesAllowed() const;
bool useUidRange() const;
std::optional<nlohmann::json> prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths); std::optional<nlohmann::json> prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths);
}; };