Split auto UID allocation from cgroups

Cgroups are now only used for derivations that require the uid-range
range feature. This allows auto UID allocation even on systems that
don't have cgroups (like macOS).

Also, make things work on modern systems that use cgroups v2 (where
there is a single hierarchy and no "systemd" controller).
This commit is contained in:
Eelco Dolstra 2022-11-08 16:03:42 +01:00
parent 40911d7dec
commit 2fde7e0108
9 changed files with 122 additions and 96 deletions

View file

@ -160,7 +160,7 @@ void LocalDerivationGoal::tryLocalBuild() {
if (useBuildUsers()) {
if (!buildUser)
buildUser = acquireUserLock();
buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1);
if (!buildUser) {
if (!actLock)
@ -495,8 +495,8 @@ void LocalDerivationGoal::startBuilder()
}
}
useUidRange = parsedDrv->getRequiredSystemFeatures().count("uid-range");
useSystemdCgroup = parsedDrv->getRequiredSystemFeatures().count("Systemd-cgroup");
assert(!useSystemdCgroup);
if (useChroot) {
@ -576,7 +576,8 @@ void LocalDerivationGoal::startBuilder()
printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir);
if (mkdir(chrootRootDir.c_str(), useUidRange ? 0755 : 0750) == -1)
// FIXME: make this 0700
if (mkdir(chrootRootDir.c_str(), buildUser && buildUser->getUIDCount() != 1 ? 0755 : 0750) == -1)
throw SysError("cannot create '%1%'", chrootRootDir);
// FIXME: only make root writable for user namespace builds.
@ -596,8 +597,8 @@ void LocalDerivationGoal::startBuilder()
createDirs(chrootRootDir + "/etc");
chownToBuilder(chrootRootDir + "/etc");
if (useUidRange && (!buildUser || buildUser->getUIDCount() < 65536))
throw Error("feature 'uid-range' requires '%s' to be enabled", settings.autoAllocateUids.name);
if (parsedDrv->useUidRange() && (!buildUser || buildUser->getUIDCount() < 65536))
throw Error("feature 'uid-range' requires the setting '%s' to be enabled", settings.autoAllocateUids.name);
/* Declare the build user's group so that programs get a consistent
view of the system (e.g., "id -gn"). */
@ -670,7 +671,7 @@ void LocalDerivationGoal::startBuilder()
#endif
#endif
} else {
if (useUidRange)
if (parsedDrv->useUidRange())
throw Error("feature 'uid-range' is only supported in sandboxed builds");
if (useSystemdCgroup)
throw Error("feature 'systemd-cgroup' is only supported in sandboxed builds");
@ -934,12 +935,12 @@ void LocalDerivationGoal::startBuilder()
the calling user (if build users are disabled). */
uid_t hostUid = buildUser ? buildUser->getUID() : getuid();
uid_t hostGid = buildUser ? buildUser->getGID() : getgid();
uint32_t nrIds = buildUser && useUidRange ? buildUser->getUIDCount() : 1;
uid_t nrIds = buildUser ? buildUser->getUIDCount() : 1;
writeFile("/proc/" + std::to_string(pid) + "/uid_map",
fmt("%d %d %d", sandboxUid(), hostUid, nrIds));
if (!useUidRange)
if (!buildUser || buildUser->getUIDCount() == 1)
writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny");
writeFile("/proc/" + std::to_string(pid) + "/gid_map",
@ -1793,7 +1794,7 @@ void LocalDerivationGoal::runChild()
throw SysError("mounting /proc");
/* Mount sysfs on /sys. */
if (useUidRange) {
if (buildUser && buildUser->getUIDCount() != 1) {
createDirs(chrootRootDir + "/sys");
if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1)
throw SysError("mounting /sys");

View file

@ -41,9 +41,6 @@ struct LocalDerivationGoal : public DerivationGoal
Path chrootRootDir;
/* Whether to give the build more than 1 UID. */
bool useUidRange = false;
/* Whether to make the 'systemd' cgroup controller available to
the build. */
bool useSystemdCgroup = false;
@ -99,8 +96,8 @@ struct LocalDerivationGoal : public DerivationGoal
result. */
std::map<Path, ValidPathInfo> prevInfos;
uid_t sandboxUid() { return usingUserNamespace ? (useUidRange ? 0 : 1000) : buildUser->getUID(); }
gid_t sandboxGid() { return usingUserNamespace ? (useUidRange ? 0 : 100) : buildUser->getGID(); }
uid_t sandboxUid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 1000 : 0) : buildUser->getUID(); }
gid_t sandboxGid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 100 : 0) : buildUser->getGID(); }
const static Path homeDir;

View file

@ -13,6 +13,7 @@
namespace nix {
// FIXME: obsolete, check for cgroup2
std::map<std::string, std::string> getCgroups(const Path & cgroupFile)
{
std::map<std::string, std::string> cgroups;

View file

@ -130,6 +130,10 @@ StringSet Settings::getDefaultSystemFeatures()
actually require anything special on the machines. */
StringSet features{"nixos-test", "benchmark", "big-parallel"};
#if __linux__
features.insert("uid-range");
#endif
#if __linux__
if (access("/dev/kvm", R_OK | W_OK) == 0)
features.insert("kvm");

View file

@ -46,6 +46,8 @@ struct PluginFilesSetting : public BaseSetting<Paths>
void set(const std::string & str, bool append = false) override;
};
const uint32_t maxIdsPerBuild = 1 << 16;
class Settings : public Config {
unsigned int getDefaultCores();
@ -279,12 +281,10 @@ public:
Setting<bool> autoAllocateUids{this, false, "auto-allocate-uids",
"Whether to allocate UIDs for builders automatically."};
const uint32_t idsPerBuild = 1 << 16;
Setting<uint32_t> startId{this, 872415232, "start-id",
"The first UID and GID to use for dynamic ID allocation."};
Setting<uint32_t> uidCount{this, idsPerBuild * 128, "id-count",
Setting<uint32_t> uidCount{this, maxIdsPerBuild * 128, "id-count",
"The number of UIDs/GIDs to use for dynamic ID allocation."};
#endif

View file

@ -20,12 +20,8 @@ struct SimpleUserLock : UserLock
killUser(uid);
}
std::pair<uid_t, uid_t> getUIDRange() override
{
assert(uid);
return {uid, uid};
}
uid_t getUID() override { assert(uid); return uid; }
uid_t getUIDCount() override { return 1; }
gid_t getGID() override { assert(gid); return gid; }
std::vector<gid_t> getSupplementaryGIDs() override { return supplementaryGIDs; }
@ -115,48 +111,65 @@ struct SimpleUserLock : UserLock
}
};
#if __linux__
struct CgroupUserLock : UserLock
struct AutoUserLock : UserLock
{
AutoCloseFD fdUserLock;
uid_t uid;
uid_t firstUid = 0;
uid_t nrIds = 1;
#if __linux__
std::optional<Path> cgroup;
#endif
~AutoUserLock()
{
// Get rid of our cgroup, ignoring errors.
if (cgroup) rmdir(cgroup->c_str());
}
void kill() override
{
#if __linux__
if (cgroup) {
printError("KILL CGROUP %s", *cgroup);
destroyCgroup(*cgroup);
cgroup.reset();
if (mkdir(cgroup->c_str(), 0755) == -1)
throw SysError("creating cgroup '%s'", *cgroup);
} else
#endif
{
assert(firstUid);
printError("KILL USER %d", firstUid);
killUser(firstUid);
}
}
std::pair<uid_t, uid_t> getUIDRange() override
{
assert(uid);
return {uid, uid + settings.idsPerBuild - 1};
}
uid_t getUID() override { assert(firstUid); return firstUid; }
gid_t getUIDCount() override { return nrIds; }
gid_t getGID() override
{
// We use the same GID ranges as for the UIDs.
assert(uid);
return uid;
assert(firstUid);
return firstUid;
}
std::vector<gid_t> getSupplementaryGIDs() override { return {}; }
static std::unique_ptr<UserLock> acquire()
static std::unique_ptr<UserLock> acquire(uid_t nrIds)
{
settings.requireExperimentalFeature(Xp::AutoAllocateUids);
assert(settings.startId > 0);
assert(settings.startId % settings.idsPerBuild == 0);
assert(settings.uidCount % settings.idsPerBuild == 0);
assert(settings.startId % maxIdsPerBuild == 0);
assert(settings.uidCount % maxIdsPerBuild == 0);
assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits<uid_t>::max());
assert(nrIds <= maxIdsPerBuild);
// FIXME: check whether the id range overlaps any known users
createDirs(settings.nixStateDir + "/userpool2");
size_t nrSlots = settings.uidCount / settings.idsPerBuild;
size_t nrSlots = settings.uidCount / maxIdsPerBuild;
for (size_t i = 0; i < nrSlots; i++) {
debug("trying user slot '%d'", i);
@ -170,11 +183,47 @@ struct CgroupUserLock : UserLock
throw SysError("opening user lock '%s'", fnUserLock);
if (lockFile(fd.get(), ltWrite, false)) {
auto lock = std::make_unique<CgroupUserLock>();
auto s = drainFD(fd.get());
#if __linux__
if (s != "") {
/* Kill the old cgroup, to ensure there are no
processes left over from an interrupted build. */
destroyCgroup(s);
}
#endif
if (ftruncate(fd.get(), 0) == -1)
throw Error("truncating user lock");
auto lock = std::make_unique<AutoUserLock>();
lock->fdUserLock = std::move(fd);
lock->uid = settings.startId + i * settings.idsPerBuild;
auto s = drainFD(lock->fdUserLock.get());
if (s != "") lock->cgroup = s;
lock->firstUid = settings.startId + i * maxIdsPerBuild;
lock->nrIds = nrIds;
if (nrIds > 1) {
auto ourCgroups = getCgroups("/proc/self/cgroup");
auto ourCgroup = ourCgroups[""];
if (ourCgroup == "")
throw Error("cannot determine cgroup name from /proc/self/cgroup");
auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup);
printError("PARENT CGROUP = %s", ourCgroupPath);
if (!pathExists(ourCgroupPath))
throw Error("expected cgroup directory '%s'", ourCgroupPath);
lock->cgroup = fmt("%s/nix-build-%d", ourCgroupPath, lock->firstUid);
printError("CHILD CGROUP = %s", *lock->cgroup);
/* Record the cgroup in the lock file. This ensures that
if we subsequently get executed under a different parent
cgroup, we kill the previous cgroup first. */
writeFull(lock->fdUserLock.get(), *lock->cgroup);
}
return lock;
}
}
@ -182,50 +231,16 @@ struct CgroupUserLock : UserLock
return nullptr;
}
std::optional<Path> cgroup;
std::optional<Path> getCgroup() override
{
if (!cgroup) {
/* Create a systemd cgroup since that's the minimum
required by systemd-nspawn. */
auto ourCgroups = getCgroups("/proc/self/cgroup");
auto systemdCgroup = ourCgroups["systemd"];
if (systemdCgroup == "")
throw Error("'systemd' cgroup does not exist");
auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup);
if (!pathExists(hostCgroup))
throw Error("expected cgroup directory '%s'", hostCgroup);
cgroup = fmt("%s/nix-%d", hostCgroup, uid);
destroyCgroup(*cgroup);
if (mkdir(cgroup->c_str(), 0755) == -1)
throw SysError("creating cgroup '%s'", *cgroup);
/* Record the cgroup in the lock file. This ensures that
if we subsequently get executed under a different parent
cgroup, we kill the previous cgroup first. */
if (ftruncate(fdUserLock.get(), 0) == -1)
throw Error("truncating user lock");
writeFull(fdUserLock.get(), *cgroup);
}
return cgroup;
};
};
#endif
std::unique_ptr<UserLock> acquireUserLock()
{
#if __linux__
if (settings.autoAllocateUids)
return CgroupUserLock::acquire();
else
std::optional<Path> getCgroup() override { return cgroup; }
#endif
};
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds)
{
if (settings.autoAllocateUids)
return AutoUserLock::acquire(nrIds);
else
return SimpleUserLock::acquire();
}

View file

@ -11,18 +11,16 @@ struct UserLock
virtual ~UserLock() { }
/* Get the first and last UID. */
virtual std::pair<uid_t, uid_t> getUIDRange() = 0;
std::pair<uid_t, uid_t> getUIDRange()
{
auto first = getUID();
return {first, first + getUIDCount() - 1};
}
/* Get the first UID. */
uid_t getUID()
{
return getUIDRange().first;
}
virtual uid_t getUID() = 0;
uid_t getUIDCount()
{
return getUIDRange().second - getUIDRange().first + 1;
}
virtual uid_t getUIDCount() = 0;
virtual gid_t getGID() = 0;
@ -31,12 +29,14 @@ struct UserLock
/* Kill any processes currently executing as this user. */
virtual void kill() = 0;
#if __linux__
virtual std::optional<Path> getCgroup() { return {}; };
#endif
};
/* Acquire a user lock. Note that this may return nullptr if no user
is available. */
std::unique_ptr<UserLock> acquireUserLock();
/* Acquire a user lock for a UID range of size `nrIds`. Note that this
may return nullptr if no user is available. */
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds);
bool useBuildUsers();

View file

@ -90,6 +90,7 @@ std::optional<Strings> ParsedDerivation::getStringsAttr(const std::string & name
StringSet ParsedDerivation::getRequiredSystemFeatures() const
{
// FIXME: cache this?
StringSet res;
for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings()))
res.insert(i);
@ -125,6 +126,11 @@ bool ParsedDerivation::substitutesAllowed() const
return getBoolAttr("allowSubstitutes", true);
}
bool ParsedDerivation::useUidRange() const
{
return getRequiredSystemFeatures().count("uid-range");
}
static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*");
std::optional<nlohmann::json> ParsedDerivation::prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths)

View file

@ -38,6 +38,8 @@ public:
bool substitutesAllowed() const;
bool useUidRange() const;
std::optional<nlohmann::json> prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths);
};