From 836573a9a2d38935e254702826d250ea39824a1b Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 31 Oct 2017 12:22:29 +0100 Subject: [PATCH 01/36] Dynamically allocate UIDs Rather than rely on a nixbld group, we now allocate UIDs/GIDs dynamically starting at a configurable ID (872415232 by default). Also, we allocate 2^18 UIDs and GIDs per build, and run the build as root in its UID namespace. (This should not be the default since it breaks some builds. We probably should enable this conditional on a requiredSystemFeature.) The goal is to be able to run (NixOS) containers in a build. However, this will also require some cgroup initialisation. The 2^18 UIDs/GIDs is intended to provide enough ID space to run multiple containers per build, e.g. for distributed NixOS tests. --- src/libstore/build.cc | 59 +++++++++++++++++++++++++++++++++++------ src/libstore/globals.hh | 10 +++++++ 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 347fe1b99..c853f609d 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -513,7 +513,6 @@ private: AutoCloseFD fdUserLock; bool isEnabled = false; - string user; uid_t uid = 0; gid_t gid = 0; std::vector supplementaryGIDs; @@ -523,9 +522,9 @@ public: void kill(); - string getUser() { return user; } uid_t getUID() { assert(uid); return uid; } - uid_t getGID() { assert(gid); return gid; } + gid_t getGID() { assert(gid); return gid; } + uint32_t getIDCount() { return 1; } std::vector getSupplementaryGIDs() { return supplementaryGIDs; } bool findFreeUser(); @@ -537,13 +536,16 @@ public: UserLock::UserLock() { +#if 0 assert(settings.buildUsersGroup != ""); createDirs(settings.nixStateDir + "/userpool"); +#endif } bool UserLock::findFreeUser() { if (enabled()) return true; +#if 0 /* Get the members of the build-users-group. */ struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); if (!gr) @@ -607,12 +609,46 @@ bool UserLock::findFreeUser() { } } + return false; +#endif + + assert(settings.startId > 0); + assert(settings.startId % settings.idsPerBuild == 0); + assert(settings.uidCount % settings.idsPerBuild == 0); + assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits::max()); + + // FIXME: check whether the id range overlaps any known users + + size_t nrSlots = settings.uidCount / settings.idsPerBuild; + + for (size_t i = 0; i < nrSlots; i++) { + debug("trying user slot '%d'", i); + + createDirs(settings.nixStateDir + "/userpool"); + + fnUserLock = fmt("%s/userpool/slot-%d", settings.nixStateDir, i); + + AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fd) + throw SysError("opening user lock '%1%'", fnUserLock); + + if (lockFile(fd.get(), ltWrite, false)) { + fdUserLock = std::move(fd); + uid = settings.startId + i * settings.idsPerBuild; + gid = settings.startId + i * settings.idsPerBuild; + return true; + } + } + return false; } void UserLock::kill() { + // FIXME: use a cgroup to kill all processes in the build? +#if 0 killUser(uid); +#endif } @@ -1523,7 +1559,7 @@ void DerivationGoal::tryLocalBuild() { /* If `build-users-group' is not empty, then we have to build as one of the members of that group. */ - if (settings.buildUsersGroup != "" && getuid() == 0) { + if ((settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0) { #if defined(__linux__) || defined(__APPLE__) if (!buildUser) buildUser = std::make_unique(); @@ -2129,7 +2165,7 @@ void DerivationGoal::startBuilder() printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir); - if (mkdir(chrootRootDir.c_str(), 0750) == -1) + if (mkdir(chrootRootDir.c_str(), 0755) == -1) throw SysError("cannot create '%1%'", chrootRootDir); if (buildUser && chown(chrootRootDir.c_str(), 0, buildUser->getGID()) == -1) @@ -2444,14 +2480,15 @@ void DerivationGoal::startBuilder() the calling user (if build users are disabled). */ uid_t hostUid = buildUser ? buildUser->getUID() : getuid(); uid_t hostGid = buildUser ? buildUser->getGID() : getgid(); + uint32_t nrIds = settings.idsPerBuild; // FIXME writeFile("/proc/" + std::to_string(pid) + "/uid_map", - (format("%d %d 1") % sandboxUid % hostUid).str()); + fmt("%d %d %d", /* sandboxUid */ 0, hostUid, nrIds)); - writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); + //writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); writeFile("/proc/" + std::to_string(pid) + "/gid_map", - (format("%d %d 1") % sandboxGid % hostGid).str()); + fmt("%d %d %d", /* sandboxGid */ 0, hostGid, nrIds)); /* Save the mount namespace of the child. We have to do this *before* the child does a chroot. */ @@ -3306,10 +3343,16 @@ void DerivationGoal::runChild() /* Switch to the sandbox uid/gid in the user namespace, which corresponds to the build user or calling user in the parent namespace. */ +#if 0 if (setgid(sandboxGid) == -1) throw SysError("setgid failed"); if (setuid(sandboxUid) == -1) throw SysError("setuid failed"); +#endif + if (setgid(0) == -1) + throw SysError("setgid failed"); + if (setuid(0) == -1) + throw SysError("setuid failed"); setUser = false; } diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 58cf08763..7dc842bca 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -148,6 +148,16 @@ public: Setting buildUsersGroup{this, "", "build-users-group", "The Unix group that contains the build users."}; + #if __linux__ + const uint32_t idsPerBuild = 1 << 18; + + Setting startId{this, 872415232, "start-id", + "The first UID and GID to use for dynamic ID allocation. (0 means disable.)"}; + + Setting uidCount{this, idsPerBuild * 128, "id-count", + "The number of UIDs/GIDs to use for dynamic ID allocation."}; + #endif + Setting impersonateLinux26{this, false, "impersonate-linux-26", "Whether to impersonate a Linux 2.6 machine on newer kernels.", {"build-impersonate-linux-26"}}; From c3e0a68c7eeeb4f491c0464392b2146ddec4305a Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 14 May 2020 13:52:41 +0200 Subject: [PATCH 02/36] canonicalisePathMetaData(): Support a UID range --- src/libstore/build.cc | 12 +++++++++--- src/libstore/local-store.cc | 27 +++++++++++++++++---------- src/libstore/local-store.hh | 15 ++++++++++++--- src/nix-store/nix-store.cc | 2 +- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index c853f609d..4e654e8ad 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -524,7 +524,7 @@ public: uid_t getUID() { assert(uid); return uid; } gid_t getGID() { assert(gid); return gid; } - uint32_t getIDCount() { return 1; } + uint32_t getIDCount() { return settings.idsPerBuild; } std::vector getSupplementaryGIDs() { return supplementaryGIDs; } bool findFreeUser(); @@ -3744,7 +3744,10 @@ void DerivationGoal::registerOutputs() /* Canonicalise first. This ensures that the path we're rewriting doesn't contain a hard link to /etc/shadow or something like that. */ - canonicalisePathMetaData(actualPath, buildUser ? buildUser->getUID() : -1, inodesSeen); + canonicalisePathMetaData( + actualPath, + buildUser ? std::optional(std::make_pair(buildUser->getUID(), buildUser->getUID() + buildUser->getIDCount() - 1)) : std::nullopt, + inodesSeen); /* FIXME: this is in-memory. */ StringSink sink; @@ -3819,7 +3822,10 @@ void DerivationGoal::registerOutputs() /* Get rid of all weird permissions. This also checks that all files are owned by the build user, if applicable. */ canonicalisePathMetaData(actualPath, - buildUser && !rewritten ? buildUser->getUID() : -1, inodesSeen); + buildUser && !rewritten + ? std::optional(std::make_pair(buildUser->getUID(), buildUser->getUID() + buildUser->getIDCount() - 1)) + : std::nullopt, + inodesSeen); /* For this output path, find the references to other paths contained in it. Compute the SHA-256 NAR hash at the same diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index eed225349..80ebe903f 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -424,7 +424,10 @@ void canonicaliseTimestampAndPermissions(const Path & path) } -static void canonicalisePathMetaData_(const Path & path, uid_t fromUid, InodesSeen & inodesSeen) +static void canonicalisePathMetaData_( + const Path & path, + std::optional> uidRange, + InodesSeen & inodesSeen) { checkInterrupt(); @@ -475,7 +478,7 @@ static void canonicalisePathMetaData_(const Path & path, uid_t fromUid, InodesSe However, ignore files that we chown'ed ourselves previously to ensure that we don't fail on hard links within the same build (i.e. "touch $out/foo; ln $out/foo $out/bar"). */ - if (fromUid != (uid_t) -1 && st.st_uid != fromUid) { + if (uidRange && (st.st_uid < uidRange->first || st.st_uid > uidRange->second)) { assert(!S_ISDIR(st.st_mode)); if (inodesSeen.find(Inode(st.st_dev, st.st_ino)) == inodesSeen.end()) throw BuildError("invalid ownership on file '%1%'", path); @@ -509,14 +512,17 @@ static void canonicalisePathMetaData_(const Path & path, uid_t fromUid, InodesSe if (S_ISDIR(st.st_mode)) { DirEntries entries = readDirectory(path); for (auto & i : entries) - canonicalisePathMetaData_(path + "/" + i.name, fromUid, inodesSeen); + canonicalisePathMetaData_(path + "/" + i.name, uidRange, inodesSeen); } } -void canonicalisePathMetaData(const Path & path, uid_t fromUid, InodesSeen & inodesSeen) +void canonicalisePathMetaData( + const Path & path, + std::optional> uidRange, + InodesSeen & inodesSeen) { - canonicalisePathMetaData_(path, fromUid, inodesSeen); + canonicalisePathMetaData_(path, uidRange, inodesSeen); /* On platforms that don't have lchown(), the top-level path can't be a symlink, since we can't change its ownership. */ @@ -531,10 +537,11 @@ void canonicalisePathMetaData(const Path & path, uid_t fromUid, InodesSeen & ino } -void canonicalisePathMetaData(const Path & path, uid_t fromUid) +void canonicalisePathMetaData(const Path & path, + std::optional> uidRange) { InodesSeen inodesSeen; - canonicalisePathMetaData(path, fromUid, inodesSeen); + canonicalisePathMetaData(path, uidRange, inodesSeen); } @@ -1021,7 +1028,7 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, autoGC(); - canonicalisePathMetaData(realPath, -1); + canonicalisePathMetaData(realPath, {}); optimisePath(realPath); // FIXME: combine with hashPath() @@ -1064,7 +1071,7 @@ StorePath LocalStore::addToStoreFromDump(const string & dump, const string & nam } else writeFile(realPath, dump); - canonicalisePathMetaData(realPath, -1); + canonicalisePathMetaData(realPath, {}); /* Register the SHA-256 hash of the NAR serialisation of the path in the database. We may just have computed it @@ -1134,7 +1141,7 @@ StorePath LocalStore::addTextToStore(const string & name, const string & s, writeFile(realPath, s); - canonicalisePathMetaData(realPath, -1); + canonicalisePathMetaData(realPath, {}); StringSink sink; dumpString(s, sink); diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index ff36cb00e..79b415875 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -311,9 +311,18 @@ typedef set InodesSeen; - the permissions are set of 444 or 555 (i.e., read-only with or without execute permission; setuid bits etc. are cleared) - the owner and group are set to the Nix user and group, if we're - running as root. */ -void canonicalisePathMetaData(const Path & path, uid_t fromUid, InodesSeen & inodesSeen); -void canonicalisePathMetaData(const Path & path, uid_t fromUid); + running as root. + If uidRange is not empty, this function will throw an error if it + encounters files owned by a user outside of the closed interval + [uidRange->first, uidRange->second]. +*/ +void canonicalisePathMetaData( + const Path & path, + std::optional> uidRange, + InodesSeen & inodesSeen); +void canonicalisePathMetaData( + const Path & path, + std::optional> uidRange); void canonicaliseTimestampAndPermissions(const Path & path); diff --git a/src/nix-store/nix-store.cc b/src/nix-store/nix-store.cc index 7d81bf54f..b948380bb 100644 --- a/src/nix-store/nix-store.cc +++ b/src/nix-store/nix-store.cc @@ -500,7 +500,7 @@ static void registerValidity(bool reregister, bool hashGiven, bool canonicalise) if (!store->isValidPath(info->path) || reregister) { /* !!! races */ if (canonicalise) - canonicalisePathMetaData(store->printStorePath(info->path), -1); + canonicalisePathMetaData(store->printStorePath(info->path), {}); if (!hashGiven) { HashResult hash = hashPath(htSHA256, store->printStorePath(info->path)); info->narHash = hash.first; From f5fa3de759a2b4c1d0107a4304a0b3f9571c87b6 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 15 May 2020 00:11:59 +0200 Subject: [PATCH 03/36] Run builds in their own cgroup Also, run builds in a cgroup namespace (ensuring /proc/self/cgroup doesn't leak information about the outside world) and mount /sys. This enables running systemd-nspawn and thus NixOS containers in a Nix build. --- src/libstore/build.cc | 66 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 4e654e8ad..816d695a5 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -2168,7 +2168,8 @@ void DerivationGoal::startBuilder() if (mkdir(chrootRootDir.c_str(), 0755) == -1) throw SysError("cannot create '%1%'", chrootRootDir); - if (buildUser && chown(chrootRootDir.c_str(), 0, buildUser->getGID()) == -1) + // FIXME: only make root writable for user namespace builds. + if (buildUser && chown(chrootRootDir.c_str(), buildUser->getUID(), buildUser->getGID()) == -1) throw SysError("cannot change ownership of '%1%'", chrootRootDir); /* Create a writable /tmp in the chroot. Many builders need @@ -2182,6 +2183,7 @@ void DerivationGoal::startBuilder() nobody account. The latter is kind of a hack to support Samba-in-QEMU. */ createDirs(chrootRootDir + "/etc"); + chownToBuilder(chrootRootDir + "/etc"); writeFile(chrootRootDir + "/etc/passwd", fmt( "root:x:0:0:Nix build user:%3%:/noshell\n" @@ -2372,6 +2374,52 @@ void DerivationGoal::startBuilder() #if __linux__ if (useChroot) { + /* Create a cgroup. */ + // FIXME: do we want to use the parent cgroup? We should + // always use the same cgroup regardless of whether we're the + // daemon or run from a user session via sudo. + std::string msg; + std::vector cgroups; + for (auto & line : tokenizeString>(readFile("/proc/self/cgroup"), "\n")) { + static std::regex regex("([0-9]+):([^:]*):(.*)"); + std::smatch match; + if (!std::regex_match(line, match, regex)) + throw Error("invalid line '%s' in '/proc/self/cgroup'", line); + + /* We only create a systemd cgroup, since that's enough + for running systemd-nspawn. */ + std::string name; + if (match[2] == "name=systemd") + name = "systemd"; + //else if (match[2] == "") + // name = "unified"; + else continue; + + std::string cgroup = match[3]; + + auto hostCgroup = canonPath("/sys/fs/cgroup/" + name + "/" + cgroup); + + if (!pathExists(hostCgroup)) + throw Error("expected unified cgroup directory '%s'", hostCgroup); + + auto childCgroup = fmt("%s/nix-%d", hostCgroup, buildUser->getUID()); + + // FIXME: if the cgroup already exists, kill all processes + // in it and destroy it. + + if (mkdir(childCgroup.c_str(), 0755) == -1 && errno != EEXIST) + throw SysError("creating cgroup '%s'", childCgroup); + + chownToBuilder(childCgroup); + chownToBuilder(childCgroup + "/cgroup.procs"); + if (name == "unified") { + chownToBuilder(childCgroup + "/cgroup.threads"); + chownToBuilder(childCgroup + "/cgroup.subtree_control"); + } + + cgroups.push_back(childCgroup); + } + /* Set up private namespaces for the build: - The PID namespace causes the build to start as PID 1. @@ -2496,6 +2544,10 @@ void DerivationGoal::startBuilder() if (sandboxMountNamespace.get() == -1) throw SysError("getting sandbox mount namespace"); + /* Move the child into its own cgroup. */ + for (auto & childCgroup : cgroups) + writeFile(childCgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); + /* Signal the builder that we've updated its user namespace. */ writeFull(userNamespaceSync.writeSide.get(), "1"); userNamespaceSync.writeSide = -1; @@ -3279,6 +3331,12 @@ void DerivationGoal::runChild() if (mount("none", (chrootRootDir + "/proc").c_str(), "proc", 0, 0) == -1) throw SysError("mounting /proc"); + /* Mount sysfs on /sys. FIXME: only in user namespace + builds. */ + createDirs(chrootRootDir + "/sys"); + if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1) + throw SysError("mounting /sys"); + /* Mount a new tmpfs on /dev/shm to ensure that whatever the builder puts in /dev/shm is cleaned up automatically. */ if (pathExists("/dev/shm") && mount("none", (chrootRootDir + "/dev/shm").c_str(), "tmpfs", 0, @@ -3321,6 +3379,12 @@ void DerivationGoal::runChild() if (unshare(CLONE_NEWNS) == -1) throw SysError("unsharing mount namespace"); + /* Unshare the cgroup namespace. This means + /proc/self/cgroup will show the child's cgroup as '/' + rather than whatever it is in the parent. */ + if (unshare(CLONE_NEWCGROUP) == -1) + throw SysError("unsharing cgroup namespace"); + /* Do the chroot(). */ if (chdir(chrootRootDir.c_str()) == -1) throw SysError("cannot change directory to '%1%'", chrootRootDir); From ca2f64bcdaef5915f5147eac935ecb770511e438 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 15 May 2020 00:32:44 +0200 Subject: [PATCH 04/36] Reduce # of UIDs per build to 65536 2^18 was overkill. The idea was to enable multiple containers to run inside a build. However, those containers can use the same UID range - we don't really care about perfect isolation between containers inside a build. --- src/libstore/globals.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 7dc842bca..89db072b0 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -149,7 +149,7 @@ public: "The Unix group that contains the build users."}; #if __linux__ - const uint32_t idsPerBuild = 1 << 18; + const uint32_t idsPerBuild = 1 << 16; Setting startId{this, 872415232, "start-id", "The first UID and GID to use for dynamic ID allocation. (0 means disable.)"}; From 7bdcf43b401eba6aee29a359c5bce1f9cc01ce52 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sat, 16 May 2020 21:09:48 +0200 Subject: [PATCH 05/36] Destroy the cgroup prior to building --- src/libstore/build.cc | 8 +++---- src/libstore/cgroup.cc | 49 ++++++++++++++++++++++++++++++++++++++++++ src/libstore/cgroup.hh | 13 +++++++++++ 3 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 src/libstore/cgroup.cc create mode 100644 src/libstore/cgroup.hh diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 816d695a5..1f1468d97 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -16,6 +16,7 @@ #include "machines.hh" #include "daemon.hh" #include "worker-protocol.hh" +#include "cgroup.hh" #include #include @@ -2400,14 +2401,13 @@ void DerivationGoal::startBuilder() auto hostCgroup = canonPath("/sys/fs/cgroup/" + name + "/" + cgroup); if (!pathExists(hostCgroup)) - throw Error("expected unified cgroup directory '%s'", hostCgroup); + throw Error("expected cgroup directory '%s'", hostCgroup); auto childCgroup = fmt("%s/nix-%d", hostCgroup, buildUser->getUID()); - // FIXME: if the cgroup already exists, kill all processes - // in it and destroy it. + destroyCgroup(childCgroup); - if (mkdir(childCgroup.c_str(), 0755) == -1 && errno != EEXIST) + if (mkdir(childCgroup.c_str(), 0755) == -1) throw SysError("creating cgroup '%s'", childCgroup); chownToBuilder(childCgroup); diff --git a/src/libstore/cgroup.cc b/src/libstore/cgroup.cc new file mode 100644 index 000000000..8cd682e68 --- /dev/null +++ b/src/libstore/cgroup.cc @@ -0,0 +1,49 @@ +#if __linux__ + +#include "cgroup.hh" +#include "util.hh" + +#include + +#include + +namespace nix { + +void destroyCgroup(const Path & cgroup) +{ + for (auto & entry : readDirectory(cgroup)) { + if (entry.type != DT_DIR) continue; + destroyCgroup(cgroup + "/" + entry.name); + } + + int round = 1; + + while (true) { + auto pids = tokenizeString>(readFile(cgroup + "/cgroup.procs")); + + if (pids.empty()) break; + + if (round > 20) + throw Error("cannot kill cgroup '%s'", cgroup); + + for (auto & pid_s : pids) { + pid_t pid; + if (!string2Int(pid_s, pid)) throw Error("invalid pid '%s'", pid); + // FIXME: pid wraparound + if (kill(pid, SIGKILL) == -1 && errno != ESRCH) + throw SysError("killing member %d of cgroup '%s'", pid, cgroup); + } + + auto sleep = std::chrono::milliseconds((int) std::pow(2.0, std::min(round, 10))); + printError("waiting for %d ms for cgroup '%s' to become empty", sleep.count(), cgroup); + std::this_thread::sleep_for(sleep); + round++; + } + + if (rmdir(cgroup.c_str()) == -1) + throw SysError("deleting cgroup '%s'", cgroup); +} + +} + +#endif diff --git a/src/libstore/cgroup.hh b/src/libstore/cgroup.hh new file mode 100644 index 000000000..c7b09398e --- /dev/null +++ b/src/libstore/cgroup.hh @@ -0,0 +1,13 @@ +#pragma once + +#if __linux__ + +#include "types.hh" + +namespace nix { + +void destroyCgroup(const Path & cgroup); + +} + +#endif From 570c443f560e015cf02e4f96102eaaa0e6853562 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sat, 16 May 2020 21:21:41 +0200 Subject: [PATCH 06/36] Simplify cgroup creation --- src/libstore/build.cc | 53 +++++++++++++----------------------------- src/libstore/cgroup.cc | 17 ++++++++++++++ src/libstore/cgroup.hh | 2 ++ 3 files changed, 35 insertions(+), 37 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 1f1468d97..97554e9cf 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -2375,50 +2375,30 @@ void DerivationGoal::startBuilder() #if __linux__ if (useChroot) { - /* Create a cgroup. */ + /* Create a systemd cgroup since that's the minimum required + by systemd-nspawn. */ // FIXME: do we want to use the parent cgroup? We should // always use the same cgroup regardless of whether we're the // daemon or run from a user session via sudo. - std::string msg; - std::vector cgroups; - for (auto & line : tokenizeString>(readFile("/proc/self/cgroup"), "\n")) { - static std::regex regex("([0-9]+):([^:]*):(.*)"); - std::smatch match; - if (!std::regex_match(line, match, regex)) - throw Error("invalid line '%s' in '/proc/self/cgroup'", line); + auto ourCgroups = getCgroups("/proc/self/cgroup"); + auto systemdCgroup = ourCgroups["systemd"]; + if (systemdCgroup == "") + throw Error("'systemd' cgroup does not exist"); - /* We only create a systemd cgroup, since that's enough - for running systemd-nspawn. */ - std::string name; - if (match[2] == "name=systemd") - name = "systemd"; - //else if (match[2] == "") - // name = "unified"; - else continue; + auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup); - std::string cgroup = match[3]; + if (!pathExists(hostCgroup)) + throw Error("expected cgroup directory '%s'", hostCgroup); - auto hostCgroup = canonPath("/sys/fs/cgroup/" + name + "/" + cgroup); + auto childCgroup = fmt("%s/nix-%d", hostCgroup, buildUser->getUID()); - if (!pathExists(hostCgroup)) - throw Error("expected cgroup directory '%s'", hostCgroup); + destroyCgroup(childCgroup); - auto childCgroup = fmt("%s/nix-%d", hostCgroup, buildUser->getUID()); + if (mkdir(childCgroup.c_str(), 0755) == -1) + throw SysError("creating cgroup '%s'", childCgroup); - destroyCgroup(childCgroup); - - if (mkdir(childCgroup.c_str(), 0755) == -1) - throw SysError("creating cgroup '%s'", childCgroup); - - chownToBuilder(childCgroup); - chownToBuilder(childCgroup + "/cgroup.procs"); - if (name == "unified") { - chownToBuilder(childCgroup + "/cgroup.threads"); - chownToBuilder(childCgroup + "/cgroup.subtree_control"); - } - - cgroups.push_back(childCgroup); - } + chownToBuilder(childCgroup); + chownToBuilder(childCgroup + "/cgroup.procs"); /* Set up private namespaces for the build: @@ -2545,8 +2525,7 @@ void DerivationGoal::startBuilder() throw SysError("getting sandbox mount namespace"); /* Move the child into its own cgroup. */ - for (auto & childCgroup : cgroups) - writeFile(childCgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); + writeFile(childCgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); /* Signal the builder that we've updated its user namespace. */ writeFull(userNamespaceSync.writeSide.get(), "1"); diff --git a/src/libstore/cgroup.cc b/src/libstore/cgroup.cc index 8cd682e68..887facdca 100644 --- a/src/libstore/cgroup.cc +++ b/src/libstore/cgroup.cc @@ -9,6 +9,23 @@ namespace nix { +std::map getCgroups(const Path & cgroupFile) +{ + std::map cgroups; + + for (auto & line : tokenizeString>(readFile(cgroupFile), "\n")) { + static std::regex regex("([0-9]+):([^:]*):(.*)"); + std::smatch match; + if (!std::regex_match(line, match, regex)) + throw Error("invalid line '%s' in '%s'", line, cgroupFile); + + std::string name = hasPrefix(match[2], "name=") ? std::string(match[2], 5) : match[2]; + cgroups.insert_or_assign(name, match[3]); + } + + return cgroups; +} + void destroyCgroup(const Path & cgroup) { for (auto & entry : readDirectory(cgroup)) { diff --git a/src/libstore/cgroup.hh b/src/libstore/cgroup.hh index c7b09398e..dc6758957 100644 --- a/src/libstore/cgroup.hh +++ b/src/libstore/cgroup.hh @@ -6,6 +6,8 @@ namespace nix { +std::map getCgroups(const Path & cgroupFile); + void destroyCgroup(const Path & cgroup); } From ba50c3efa3b2394f5a8372939bc600008cd25e7e Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 19 May 2020 23:25:44 +0200 Subject: [PATCH 07/36] Add "uid-range" and "systemd-cgroup" system features "uid-range" provides 65536 UIDs to a build and runs the build as root in its user namespace. "systemd-cgroup" allows the build to mount the systemd cgroup controller (needed for running systemd-nspawn and NixOS containers). Also, add a configuration option "auto-allocate-uids" which is needed to enable these features, and some experimental feature gates. So to enable support for containers you need the following in nix.conf: experimental-features = auto-allocate-uids systemd-cgroup auto-allocate-uids = true system-features = uid-range systemd-cgroup --- src/libstore/build.cc | 270 ++++++++++---------------------------- src/libstore/cgroup.cc | 19 ++- src/libstore/globals.hh | 5 +- src/libstore/user-lock.cc | 212 ++++++++++++++++++++++++++++++ src/libstore/user-lock.hh | 39 ++++++ 5 files changed, 340 insertions(+), 205 deletions(-) create mode 100644 src/libstore/user-lock.cc create mode 100644 src/libstore/user-lock.hh diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 97554e9cf..1f79a8d2d 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -16,7 +16,7 @@ #include "machines.hh" #include "daemon.hh" #include "worker-protocol.hh" -#include "cgroup.hh" +#include "user-lock.hh" #include #include @@ -504,154 +504,6 @@ void handleDiffHook( } } -////////////////////////////////////////////////////////////////////// - - -class UserLock -{ -private: - Path fnUserLock; - AutoCloseFD fdUserLock; - - bool isEnabled = false; - uid_t uid = 0; - gid_t gid = 0; - std::vector supplementaryGIDs; - -public: - UserLock(); - - void kill(); - - uid_t getUID() { assert(uid); return uid; } - gid_t getGID() { assert(gid); return gid; } - uint32_t getIDCount() { return settings.idsPerBuild; } - std::vector getSupplementaryGIDs() { return supplementaryGIDs; } - - bool findFreeUser(); - - bool enabled() { return isEnabled; } - -}; - - -UserLock::UserLock() -{ -#if 0 - assert(settings.buildUsersGroup != ""); - createDirs(settings.nixStateDir + "/userpool"); -#endif -} - -bool UserLock::findFreeUser() { - if (enabled()) return true; - -#if 0 - /* Get the members of the build-users-group. */ - struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); - if (!gr) - throw Error("the group '%1%' specified in 'build-users-group' does not exist", - settings.buildUsersGroup); - gid = gr->gr_gid; - - /* Copy the result of getgrnam. */ - Strings users; - for (char * * p = gr->gr_mem; *p; ++p) { - debug("found build user '%1%'", *p); - users.push_back(*p); - } - - if (users.empty()) - throw Error("the build users group '%1%' has no members", - settings.buildUsersGroup); - - /* Find a user account that isn't currently in use for another - build. */ - for (auto & i : users) { - debug("trying user '%1%'", i); - - struct passwd * pw = getpwnam(i.c_str()); - if (!pw) - throw Error("the user '%1%' in the group '%2%' does not exist", - i, settings.buildUsersGroup); - - - fnUserLock = (format("%1%/userpool/%2%") % settings.nixStateDir % pw->pw_uid).str(); - - AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); - if (!fd) - throw SysError("opening user lock '%1%'", fnUserLock); - - if (lockFile(fd.get(), ltWrite, false)) { - fdUserLock = std::move(fd); - user = i; - uid = pw->pw_uid; - - /* Sanity check... */ - if (uid == getuid() || uid == geteuid()) - throw Error("the Nix user should not be a member of '%1%'", - settings.buildUsersGroup); - -#if __linux__ - /* Get the list of supplementary groups of this build user. This - is usually either empty or contains a group such as "kvm". */ - supplementaryGIDs.resize(10); - int ngroups = supplementaryGIDs.size(); - int err = getgrouplist(pw->pw_name, pw->pw_gid, - supplementaryGIDs.data(), &ngroups); - if (err == -1) - throw Error("failed to get list of supplementary groups for '%1%'", pw->pw_name); - - supplementaryGIDs.resize(ngroups); -#endif - - isEnabled = true; - return true; - } - } - - return false; -#endif - - assert(settings.startId > 0); - assert(settings.startId % settings.idsPerBuild == 0); - assert(settings.uidCount % settings.idsPerBuild == 0); - assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits::max()); - - // FIXME: check whether the id range overlaps any known users - - size_t nrSlots = settings.uidCount / settings.idsPerBuild; - - for (size_t i = 0; i < nrSlots; i++) { - debug("trying user slot '%d'", i); - - createDirs(settings.nixStateDir + "/userpool"); - - fnUserLock = fmt("%s/userpool/slot-%d", settings.nixStateDir, i); - - AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); - if (!fd) - throw SysError("opening user lock '%1%'", fnUserLock); - - if (lockFile(fd.get(), ltWrite, false)) { - fdUserLock = std::move(fd); - uid = settings.startId + i * settings.idsPerBuild; - gid = settings.startId + i * settings.idsPerBuild; - return true; - } - } - - return false; -} - -void UserLock::kill() -{ - // FIXME: use a cgroup to kill all processes in the build? -#if 0 - killUser(uid); -#endif -} - ////////////////////////////////////////////////////////////////////// @@ -840,6 +692,13 @@ private: Path chrootRootDir; + /* Whether to give the build more than 1 UID. */ + bool useUidRange = false; + + /* Whether to make the 'systemd' cgroup controller available to + the build. */ + bool useSystemdCgroup = false; + /* RAII object to delete the chroot directory. */ std::shared_ptr autoDelChroot; @@ -896,8 +755,8 @@ private: result. */ std::map prevInfos; - const uid_t sandboxUid = 1000; - const gid_t sandboxGid = 100; + uid_t sandboxUid = -1; + gid_t sandboxGid = -1; const static Path homeDir; @@ -1445,6 +1304,7 @@ void DerivationGoal::inputsRealised() result = BuildResult(); } + void DerivationGoal::started() { auto msg = fmt( buildMode == bmRepair ? "repairing outputs of '%s'" : @@ -1459,6 +1319,7 @@ void DerivationGoal::started() { worker.updateProgress(); } + void DerivationGoal::tryToBuild() { trace("trying to build"); @@ -1556,25 +1417,28 @@ void DerivationGoal::tryToBuild() worker.wakeUp(shared_from_this()); } + void DerivationGoal::tryLocalBuild() { /* If `build-users-group' is not empty, then we have to build as one of the members of that group. */ - if ((settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0) { + static bool useBuildUsers = (settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0; + if (useBuildUsers) { #if defined(__linux__) || defined(__APPLE__) - if (!buildUser) buildUser = std::make_unique(); + if (!buildUser) + buildUser = acquireUserLock(); - if (buildUser->findFreeUser()) { - /* Make sure that no other processes are executing under this - uid. */ - buildUser->kill(); - } else { + if (!buildUser) { if (!actLock) actLock = std::make_unique(*logger, lvlWarn, actBuildWaiting, fmt("waiting for UID to build '%s'", yellowtxt(worker.store.printStorePath(drvPath)))); worker.waitForAWhile(shared_from_this()); return; } + + /* Make sure that no other processes are executing under this + uid. */ + buildUser->kill(); #else /* Don't know how to block the creation of setuid/setgid binaries on this platform. */ @@ -2087,6 +1951,9 @@ void DerivationGoal::startBuilder() } } + useUidRange = parsedDrv->getRequiredSystemFeatures().count("uid-range"); + useSystemdCgroup = parsedDrv->getRequiredSystemFeatures().count("systemd-cgroup"); + if (useChroot) { /* Allow a user-configurable set of directories from the @@ -2166,7 +2033,7 @@ void DerivationGoal::startBuilder() printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir); - if (mkdir(chrootRootDir.c_str(), 0755) == -1) + if (mkdir(chrootRootDir.c_str(), useUidRange ? 0755 : 0750) == -1) throw SysError("cannot create '%1%'", chrootRootDir); // FIXME: only make root writable for user namespace builds. @@ -2186,6 +2053,12 @@ void DerivationGoal::startBuilder() createDirs(chrootRootDir + "/etc"); chownToBuilder(chrootRootDir + "/etc"); + if (useUidRange && (!buildUser || buildUser->getUIDCount() < 65536)) + throw Error("feature 'uid-range' requires '%s' to be enabled", settings.autoAllocateUids.name); + + sandboxUid = useUidRange ? 0 : 1000; + sandboxGid = useUidRange ? 0 : 100; + writeFile(chrootRootDir + "/etc/passwd", fmt( "root:x:0:0:Nix build user:%3%:/noshell\n" "nixbld:x:%1%:%2%:Nix build user:%3%:/noshell\n" @@ -2238,12 +2111,32 @@ void DerivationGoal::startBuilder() for (auto & i : drv->outputs) dirsInChroot.erase(worker.store.printStorePath(i.second.path)); -#elif __APPLE__ - /* We don't really have any parent prep work to do (yet?) - All work happens in the child, instead. */ + if (useSystemdCgroup) { + settings.requireExperimentalFeature("systemd-cgroup"); + std::optional cgroup; + if (!buildUser || !(cgroup = buildUser->getCgroup())) + throw Error("feature 'systemd-cgroup' requires 'auto-allocate-uids = true' in nix.conf"); + chownToBuilder(*cgroup); + chownToBuilder(*cgroup + "/cgroup.procs"); + } + #else - throw Error("sandboxing builds is not supported on this platform"); + if (useUidRange) + throw Error("feature 'uid-range' is not supported on this platform"); + if (useSystemdCgroup) + throw Error("feature 'systemd-cgroup' is not supported on this platform"); + #if __APPLE__ + /* We don't really have any parent prep work to do (yet?) + All work happens in the child, instead. */ + #else + throw Error("sandboxing builds is not supported on this platform"); + #endif #endif + } else { + if (useUidRange) + throw Error("feature 'uid-range' is only supported in sandboxed builds"); + if (useSystemdCgroup) + throw Error("feature 'systemd-cgroup' is only supported in sandboxed builds"); } if (needsHashRewrite()) { @@ -2375,31 +2268,6 @@ void DerivationGoal::startBuilder() #if __linux__ if (useChroot) { - /* Create a systemd cgroup since that's the minimum required - by systemd-nspawn. */ - // FIXME: do we want to use the parent cgroup? We should - // always use the same cgroup regardless of whether we're the - // daemon or run from a user session via sudo. - auto ourCgroups = getCgroups("/proc/self/cgroup"); - auto systemdCgroup = ourCgroups["systemd"]; - if (systemdCgroup == "") - throw Error("'systemd' cgroup does not exist"); - - auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup); - - if (!pathExists(hostCgroup)) - throw Error("expected cgroup directory '%s'", hostCgroup); - - auto childCgroup = fmt("%s/nix-%d", hostCgroup, buildUser->getUID()); - - destroyCgroup(childCgroup); - - if (mkdir(childCgroup.c_str(), 0755) == -1) - throw SysError("creating cgroup '%s'", childCgroup); - - chownToBuilder(childCgroup); - chownToBuilder(childCgroup + "/cgroup.procs"); - /* Set up private namespaces for the build: - The PID namespace causes the build to start as PID 1. @@ -2508,15 +2376,16 @@ void DerivationGoal::startBuilder() the calling user (if build users are disabled). */ uid_t hostUid = buildUser ? buildUser->getUID() : getuid(); uid_t hostGid = buildUser ? buildUser->getGID() : getgid(); - uint32_t nrIds = settings.idsPerBuild; // FIXME + uint32_t nrIds = buildUser && useUidRange ? buildUser->getUIDCount() : 1; writeFile("/proc/" + std::to_string(pid) + "/uid_map", - fmt("%d %d %d", /* sandboxUid */ 0, hostUid, nrIds)); + fmt("%d %d %d", sandboxUid, hostUid, nrIds)); - //writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); + if (!useUidRange) + writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); writeFile("/proc/" + std::to_string(pid) + "/gid_map", - fmt("%d %d %d", /* sandboxGid */ 0, hostGid, nrIds)); + fmt("%d %d %d", sandboxGid, hostGid, nrIds)); /* Save the mount namespace of the child. We have to do this *before* the child does a chroot. */ @@ -2525,7 +2394,10 @@ void DerivationGoal::startBuilder() throw SysError("getting sandbox mount namespace"); /* Move the child into its own cgroup. */ - writeFile(childCgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); + if (buildUser) { + if (auto cgroup = buildUser->getCgroup()) + writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); + } /* Signal the builder that we've updated its user namespace. */ writeFull(userNamespaceSync.writeSide.get(), "1"); @@ -3361,7 +3233,7 @@ void DerivationGoal::runChild() /* Unshare the cgroup namespace. This means /proc/self/cgroup will show the child's cgroup as '/' rather than whatever it is in the parent. */ - if (unshare(CLONE_NEWCGROUP) == -1) + if (useSystemdCgroup && unshare(CLONE_NEWCGROUP) == -1) throw SysError("unsharing cgroup namespace"); /* Do the chroot(). */ @@ -3386,16 +3258,10 @@ void DerivationGoal::runChild() /* Switch to the sandbox uid/gid in the user namespace, which corresponds to the build user or calling user in the parent namespace. */ -#if 0 if (setgid(sandboxGid) == -1) throw SysError("setgid failed"); if (setuid(sandboxUid) == -1) throw SysError("setuid failed"); -#endif - if (setgid(0) == -1) - throw SysError("setgid failed"); - if (setuid(0) == -1) - throw SysError("setuid failed"); setUser = false; } @@ -3789,7 +3655,7 @@ void DerivationGoal::registerOutputs() something like that. */ canonicalisePathMetaData( actualPath, - buildUser ? std::optional(std::make_pair(buildUser->getUID(), buildUser->getUID() + buildUser->getIDCount() - 1)) : std::nullopt, + buildUser ? std::optional(buildUser->getUIDRange()) : std::nullopt, inodesSeen); /* FIXME: this is in-memory. */ @@ -3866,7 +3732,7 @@ void DerivationGoal::registerOutputs() all files are owned by the build user, if applicable. */ canonicalisePathMetaData(actualPath, buildUser && !rewritten - ? std::optional(std::make_pair(buildUser->getUID(), buildUser->getUID() + buildUser->getIDCount() - 1)) + ? std::optional(buildUser->getUIDRange()) : std::nullopt, inodesSeen); diff --git a/src/libstore/cgroup.cc b/src/libstore/cgroup.cc index 887facdca..9e5e937df 100644 --- a/src/libstore/cgroup.cc +++ b/src/libstore/cgroup.cc @@ -4,6 +4,7 @@ #include "util.hh" #include +#include #include @@ -19,7 +20,7 @@ std::map getCgroups(const Path & cgroupFile) if (!std::regex_match(line, match, regex)) throw Error("invalid line '%s' in '%s'", line, cgroupFile); - std::string name = hasPrefix(match[2], "name=") ? std::string(match[2], 5) : match[2]; + std::string name = hasPrefix(std::string(match[2]), "name=") ? std::string(match[2], 5) : match[2]; cgroups.insert_or_assign(name, match[3]); } @@ -28,6 +29,8 @@ std::map getCgroups(const Path & cgroupFile) void destroyCgroup(const Path & cgroup) { + if (!pathExists(cgroup)) return; + for (auto & entry : readDirectory(cgroup)) { if (entry.type != DT_DIR) continue; destroyCgroup(cgroup + "/" + entry.name); @@ -35,6 +38,8 @@ void destroyCgroup(const Path & cgroup) int round = 1; + std::unordered_set pidsShown; + while (true) { auto pids = tokenizeString>(readFile(cgroup + "/cgroup.procs")); @@ -46,13 +51,23 @@ void destroyCgroup(const Path & cgroup) for (auto & pid_s : pids) { pid_t pid; if (!string2Int(pid_s, pid)) throw Error("invalid pid '%s'", pid); + if (pidsShown.insert(pid).second) { + try { + auto cmdline = readFile(fmt("/proc/%d/cmdline", pid)); + using namespace std::string_literals; + warn("killing stray builder process %d (%s)...", + pid, trim(replaceStrings(cmdline, "\0"s, " "))); + } catch (SysError &) { + } + } // FIXME: pid wraparound if (kill(pid, SIGKILL) == -1 && errno != ESRCH) throw SysError("killing member %d of cgroup '%s'", pid, cgroup); } auto sleep = std::chrono::milliseconds((int) std::pow(2.0, std::min(round, 10))); - printError("waiting for %d ms for cgroup '%s' to become empty", sleep.count(), cgroup); + if (sleep.count() > 100) + printError("waiting for %d ms for cgroup '%s' to become empty", sleep.count(), cgroup); std::this_thread::sleep_for(sleep); round++; } diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 89db072b0..5cf73c7b4 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -149,10 +149,13 @@ public: "The Unix group that contains the build users."}; #if __linux__ + Setting autoAllocateUids{this, false, "auto-allocate-uids", + "Whether to allocate UIDs for builders automatically."}; + const uint32_t idsPerBuild = 1 << 16; Setting startId{this, 872415232, "start-id", - "The first UID and GID to use for dynamic ID allocation. (0 means disable.)"}; + "The first UID and GID to use for dynamic ID allocation."}; Setting uidCount{this, idsPerBuild * 128, "id-count", "The number of UIDs/GIDs to use for dynamic ID allocation."}; diff --git a/src/libstore/user-lock.cc b/src/libstore/user-lock.cc new file mode 100644 index 000000000..8a09df4d1 --- /dev/null +++ b/src/libstore/user-lock.cc @@ -0,0 +1,212 @@ +#include "user-lock.hh" +#include "globals.hh" +#include "pathlocks.hh" +#include "cgroup.hh" + +namespace nix { + +struct SimpleUserLock : UserLock +{ + AutoCloseFD fdUserLock; + uid_t uid; + gid_t gid; + std::vector supplementaryGIDs; + + void kill() override + { + killUser(uid); + } + + std::pair getUIDRange() override + { + assert(uid); + return {uid, uid}; + } + + gid_t getGID() override { assert(gid); return gid; } + + std::vector getSupplementaryGIDs() override { return supplementaryGIDs; } + + static std::unique_ptr acquire() + { + assert(settings.buildUsersGroup != ""); + createDirs(settings.nixStateDir + "/userpool"); + + /* Get the members of the build-users-group. */ + struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); + if (!gr) + throw Error("the group '%s' specified in 'build-users-group' does not exist", settings.buildUsersGroup); + + /* Copy the result of getgrnam. */ + Strings users; + for (char * * p = gr->gr_mem; *p; ++p) { + debug("found build user '%s'", *p); + users.push_back(*p); + } + + if (users.empty()) + throw Error("the build users group '%s' has no members", settings.buildUsersGroup); + + /* Find a user account that isn't currently in use for another + build. */ + for (auto & i : users) { + debug("trying user '%s'", i); + + struct passwd * pw = getpwnam(i.c_str()); + if (!pw) + throw Error("the user '%s' in the group '%s' does not exist", i, settings.buildUsersGroup); + + auto fnUserLock = fmt("%s/userpool/%s", settings.nixStateDir,pw->pw_uid); + + AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fd) + throw SysError("opening user lock '%s'", fnUserLock); + + if (lockFile(fd.get(), ltWrite, false)) { + auto lock = std::make_unique(); + + lock->fdUserLock = std::move(fd); + lock->uid = pw->pw_uid; + lock->gid = gr->gr_gid; + + /* Sanity check... */ + if (lock->uid == getuid() || lock->uid == geteuid()) + throw Error("the Nix user should not be a member of '%s'", settings.buildUsersGroup); + + #if __linux__ + /* Get the list of supplementary groups of this build + user. This is usually either empty or contains a + group such as "kvm". */ + lock->supplementaryGIDs.resize(10); + int ngroups = lock->supplementaryGIDs.size(); + int err = getgrouplist(pw->pw_name, pw->pw_gid, + lock->supplementaryGIDs.data(), &ngroups); + if (err == -1) + throw Error("failed to get list of supplementary groups for '%s'", pw->pw_name); + + lock->supplementaryGIDs.resize(ngroups); + #endif + + return lock; + } + } + + return nullptr; + } +}; + +#if __linux__ +struct CgroupUserLock : UserLock +{ + AutoCloseFD fdUserLock; + uid_t uid; + + void kill() override + { + if (cgroup) { + destroyCgroup(*cgroup); + cgroup.reset(); + } + } + + std::pair getUIDRange() override + { + assert(uid); + return {uid, uid + settings.idsPerBuild - 1}; + } + + gid_t getGID() override + { + // We use the same GID ranges as for the UIDs. + assert(uid); + return uid; + } + + std::vector getSupplementaryGIDs() override { return {}; } // FIXME + + static std::unique_ptr acquire() + { + settings.requireExperimentalFeature("auto-allocate-uids"); + assert(settings.startId > 0); + assert(settings.startId % settings.idsPerBuild == 0); + assert(settings.uidCount % settings.idsPerBuild == 0); + assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits::max()); + + // FIXME: check whether the id range overlaps any known users + + createDirs(settings.nixStateDir + "/userpool2"); + + size_t nrSlots = settings.uidCount / settings.idsPerBuild; + + for (size_t i = 0; i < nrSlots; i++) { + debug("trying user slot '%d'", i); + + createDirs(settings.nixStateDir + "/userpool2"); + + auto fnUserLock = fmt("%s/userpool2/slot-%d", settings.nixStateDir, i); + + AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fd) + throw SysError("opening user lock '%s'", fnUserLock); + + if (lockFile(fd.get(), ltWrite, false)) { + auto lock = std::make_unique(); + lock->fdUserLock = std::move(fd); + lock->uid = settings.startId + i * settings.idsPerBuild; + auto s = drainFD(lock->fdUserLock.get()); + if (s != "") lock->cgroup = s; + return lock; + } + } + + return nullptr; + } + + std::optional cgroup; + + std::optional getCgroup() override + { + if (!cgroup) { + /* Create a systemd cgroup since that's the minimum + required by systemd-nspawn. */ + auto ourCgroups = getCgroups("/proc/self/cgroup"); + auto systemdCgroup = ourCgroups["systemd"]; + if (systemdCgroup == "") + throw Error("'systemd' cgroup does not exist"); + + auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup); + + if (!pathExists(hostCgroup)) + throw Error("expected cgroup directory '%s'", hostCgroup); + + cgroup = fmt("%s/nix-%d", hostCgroup, uid); + + destroyCgroup(*cgroup); + + if (mkdir(cgroup->c_str(), 0755) == -1) + throw SysError("creating cgroup '%s'", *cgroup); + + /* Record the cgroup in the lock file. This ensures that + if we subsequently get executed under a different parent + cgroup, we kill the previous cgroup first. */ + if (ftruncate(fdUserLock.get(), 0) == -1) + throw Error("truncating user lock"); + writeFull(fdUserLock.get(), *cgroup); + } + + return cgroup; + }; +}; +#endif + +std::unique_ptr acquireUserLock() +{ + #if __linux__ + if (settings.autoAllocateUids) + return CgroupUserLock::acquire(); + else + #endif + return SimpleUserLock::acquire(); +} + +} diff --git a/src/libstore/user-lock.hh b/src/libstore/user-lock.hh new file mode 100644 index 000000000..88d068689 --- /dev/null +++ b/src/libstore/user-lock.hh @@ -0,0 +1,39 @@ +#pragma once + +#include "types.hh" + +namespace nix { + +struct UserLock +{ + virtual ~UserLock() { } + + /* Get the first and last UID. */ + virtual std::pair getUIDRange() = 0; + + /* Get the first UID. */ + uid_t getUID() + { + return getUIDRange().first; + } + + uid_t getUIDCount() + { + return getUIDRange().second - getUIDRange().first + 1; + } + + virtual gid_t getGID() = 0; + + virtual std::vector getSupplementaryGIDs() = 0; + + /* Kill any processes currently executing as this user. */ + virtual void kill() = 0; + + virtual std::optional getCgroup() { return {}; }; +}; + +/* Acquire a user lock. Note that this may return nullptr if no user + is available. */ +std::unique_ptr acquireUserLock(); + +} From 8c4cce553c16438f0ccbbaea6d77f2bd64306dfe Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 20 May 2020 11:24:21 +0200 Subject: [PATCH 08/36] Fix macOS build --- src/libstore/build.cc | 11 +---------- src/libstore/user-lock.cc | 13 +++++++++++++ src/libstore/user-lock.hh | 2 ++ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 1f79a8d2d..6c3f94a76 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -1420,11 +1420,7 @@ void DerivationGoal::tryToBuild() void DerivationGoal::tryLocalBuild() { - /* If `build-users-group' is not empty, then we have to build as - one of the members of that group. */ - static bool useBuildUsers = (settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0; - if (useBuildUsers) { -#if defined(__linux__) || defined(__APPLE__) + if (useBuildUsers()) { if (!buildUser) buildUser = acquireUserLock(); @@ -1439,11 +1435,6 @@ void DerivationGoal::tryLocalBuild() { /* Make sure that no other processes are executing under this uid. */ buildUser->kill(); -#else - /* Don't know how to block the creation of setuid/setgid - binaries on this platform. */ - throw Error("build users are not supported on this platform for security reasons"); -#endif } actLock.reset(); diff --git a/src/libstore/user-lock.cc b/src/libstore/user-lock.cc index 8a09df4d1..2254386da 100644 --- a/src/libstore/user-lock.cc +++ b/src/libstore/user-lock.cc @@ -209,4 +209,17 @@ std::unique_ptr acquireUserLock() return SimpleUserLock::acquire(); } +bool useBuildUsers() +{ + #if __linux__ + static bool b = (settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0; + return b; + #elif __APPLE__ + static bool b = settings.buildUsersGroup != "" && getuid() == 0; + return b; + #else + return false; + #endif +} + } diff --git a/src/libstore/user-lock.hh b/src/libstore/user-lock.hh index 88d068689..bfb55b0d9 100644 --- a/src/libstore/user-lock.hh +++ b/src/libstore/user-lock.hh @@ -36,4 +36,6 @@ struct UserLock is available. */ std::unique_ptr acquireUserLock(); +bool useBuildUsers(); + } From 7349f257da8278af9aae35544b15c9a204e2a57b Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 20 May 2020 11:57:33 +0200 Subject: [PATCH 09/36] Only mount /sys in uid-range builds Maybe this should be a separate system feature... /sys exposes a lot of impure info about the host system. --- src/libstore/build.cc | 11 ++++++----- src/libstore/user-lock.cc | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 6c3f94a76..e927a65f0 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -3173,11 +3173,12 @@ void DerivationGoal::runChild() if (mount("none", (chrootRootDir + "/proc").c_str(), "proc", 0, 0) == -1) throw SysError("mounting /proc"); - /* Mount sysfs on /sys. FIXME: only in user namespace - builds. */ - createDirs(chrootRootDir + "/sys"); - if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1) - throw SysError("mounting /sys"); + /* Mount sysfs on /sys. */ + if (useUidRange) { + createDirs(chrootRootDir + "/sys"); + if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1) + throw SysError("mounting /sys"); + } /* Mount a new tmpfs on /dev/shm to ensure that whatever the builder puts in /dev/shm is cleaned up automatically. */ diff --git a/src/libstore/user-lock.cc b/src/libstore/user-lock.cc index 2254386da..fb2a45f48 100644 --- a/src/libstore/user-lock.cc +++ b/src/libstore/user-lock.cc @@ -122,7 +122,7 @@ struct CgroupUserLock : UserLock return uid; } - std::vector getSupplementaryGIDs() override { return {}; } // FIXME + std::vector getSupplementaryGIDs() override { return {}; } static std::unique_ptr acquire() { From 6259fd7ea6ac331d1037a150b07c01125e80ed8e Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 4 Nov 2022 13:29:39 +0100 Subject: [PATCH 10/36] Fix indentation --- src/nix-store/nix-store.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nix-store/nix-store.cc b/src/nix-store/nix-store.cc index eb65e7dde..b59a6d026 100644 --- a/src/nix-store/nix-store.cc +++ b/src/nix-store/nix-store.cc @@ -745,8 +745,8 @@ static void opVerifyPath(Strings opFlags, Strings opArgs) auto current = sink.finish(); if (current.first != info->narHash) { printError("path '%s' was modified! expected hash '%s', got '%s'", - store->printStorePath(path), - info->narHash.to_string(Base32, true), + store->printStorePath(path), + info->narHash.to_string(Base32, true), current.first.to_string(Base32, true)); status = 1; } From 40911d7dec75541a400fe8f556d4c70a7f845fac Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 4 Nov 2022 13:30:35 +0100 Subject: [PATCH 11/36] Remove stray tab --- src/libutil/experimental-features.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libutil/experimental-features.cc b/src/libutil/experimental-features.cc index 30d071408..670079019 100644 --- a/src/libutil/experimental-features.cc +++ b/src/libutil/experimental-features.cc @@ -14,8 +14,8 @@ std::map stringifiedXpFeatures = { { Xp::NoUrlLiterals, "no-url-literals" }, { Xp::FetchClosure, "fetch-closure" }, { Xp::ReplFlake, "repl-flake" }, - { Xp::AutoAllocateUids, "auto-allocate-uids" }, - { Xp::SystemdCgroup, "systemd-cgroup" }, + { Xp::AutoAllocateUids, "auto-allocate-uids" }, + { Xp::SystemdCgroup, "systemd-cgroup" }, }; const std::optional parseExperimentalFeature(const std::string_view & name) From 2fde7e0108d70bcba64ebecc5e5c7ee2863e3446 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 8 Nov 2022 16:03:42 +0100 Subject: [PATCH 12/36] Split auto UID allocation from cgroups Cgroups are now only used for derivations that require the uid-range range feature. This allows auto UID allocation even on systems that don't have cgroups (like macOS). Also, make things work on modern systems that use cgroups v2 (where there is a single hierarchy and no "systemd" controller). --- src/libstore/build/local-derivation-goal.cc | 19 +-- src/libstore/build/local-derivation-goal.hh | 7 +- src/libstore/cgroup.cc | 1 + src/libstore/globals.cc | 4 + src/libstore/globals.hh | 6 +- src/libstore/lock.cc | 149 +++++++++++--------- src/libstore/lock.hh | 24 ++-- src/libstore/parsed-derivations.cc | 6 + src/libstore/parsed-derivations.hh | 2 + 9 files changed, 122 insertions(+), 96 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 64540d262..09da87476 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -160,7 +160,7 @@ void LocalDerivationGoal::tryLocalBuild() { if (useBuildUsers()) { if (!buildUser) - buildUser = acquireUserLock(); + buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1); if (!buildUser) { if (!actLock) @@ -495,8 +495,8 @@ void LocalDerivationGoal::startBuilder() } } - useUidRange = parsedDrv->getRequiredSystemFeatures().count("uid-range"); useSystemdCgroup = parsedDrv->getRequiredSystemFeatures().count("Systemd-cgroup"); + assert(!useSystemdCgroup); if (useChroot) { @@ -576,7 +576,8 @@ void LocalDerivationGoal::startBuilder() printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir); - if (mkdir(chrootRootDir.c_str(), useUidRange ? 0755 : 0750) == -1) + // FIXME: make this 0700 + if (mkdir(chrootRootDir.c_str(), buildUser && buildUser->getUIDCount() != 1 ? 0755 : 0750) == -1) throw SysError("cannot create '%1%'", chrootRootDir); // FIXME: only make root writable for user namespace builds. @@ -596,8 +597,8 @@ void LocalDerivationGoal::startBuilder() createDirs(chrootRootDir + "/etc"); chownToBuilder(chrootRootDir + "/etc"); - if (useUidRange && (!buildUser || buildUser->getUIDCount() < 65536)) - throw Error("feature 'uid-range' requires '%s' to be enabled", settings.autoAllocateUids.name); + if (parsedDrv->useUidRange() && (!buildUser || buildUser->getUIDCount() < 65536)) + throw Error("feature 'uid-range' requires the setting '%s' to be enabled", settings.autoAllocateUids.name); /* Declare the build user's group so that programs get a consistent view of the system (e.g., "id -gn"). */ @@ -670,7 +671,7 @@ void LocalDerivationGoal::startBuilder() #endif #endif } else { - if (useUidRange) + if (parsedDrv->useUidRange()) throw Error("feature 'uid-range' is only supported in sandboxed builds"); if (useSystemdCgroup) throw Error("feature 'systemd-cgroup' is only supported in sandboxed builds"); @@ -934,12 +935,12 @@ void LocalDerivationGoal::startBuilder() the calling user (if build users are disabled). */ uid_t hostUid = buildUser ? buildUser->getUID() : getuid(); uid_t hostGid = buildUser ? buildUser->getGID() : getgid(); - uint32_t nrIds = buildUser && useUidRange ? buildUser->getUIDCount() : 1; + uid_t nrIds = buildUser ? buildUser->getUIDCount() : 1; writeFile("/proc/" + std::to_string(pid) + "/uid_map", fmt("%d %d %d", sandboxUid(), hostUid, nrIds)); - if (!useUidRange) + if (!buildUser || buildUser->getUIDCount() == 1) writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); writeFile("/proc/" + std::to_string(pid) + "/gid_map", @@ -1793,7 +1794,7 @@ void LocalDerivationGoal::runChild() throw SysError("mounting /proc"); /* Mount sysfs on /sys. */ - if (useUidRange) { + if (buildUser && buildUser->getUIDCount() != 1) { createDirs(chrootRootDir + "/sys"); if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1) throw SysError("mounting /sys"); diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index e6700a383..61b0f9145 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -41,9 +41,6 @@ struct LocalDerivationGoal : public DerivationGoal Path chrootRootDir; - /* Whether to give the build more than 1 UID. */ - bool useUidRange = false; - /* Whether to make the 'systemd' cgroup controller available to the build. */ bool useSystemdCgroup = false; @@ -99,8 +96,8 @@ struct LocalDerivationGoal : public DerivationGoal result. */ std::map prevInfos; - uid_t sandboxUid() { return usingUserNamespace ? (useUidRange ? 0 : 1000) : buildUser->getUID(); } - gid_t sandboxGid() { return usingUserNamespace ? (useUidRange ? 0 : 100) : buildUser->getGID(); } + uid_t sandboxUid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 1000 : 0) : buildUser->getUID(); } + gid_t sandboxGid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 100 : 0) : buildUser->getGID(); } const static Path homeDir; diff --git a/src/libstore/cgroup.cc b/src/libstore/cgroup.cc index 5d31609da..56e980be3 100644 --- a/src/libstore/cgroup.cc +++ b/src/libstore/cgroup.cc @@ -13,6 +13,7 @@ namespace nix { +// FIXME: obsolete, check for cgroup2 std::map getCgroups(const Path & cgroupFile) { std::map cgroups; diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc index ff658c428..b7f55cae7 100644 --- a/src/libstore/globals.cc +++ b/src/libstore/globals.cc @@ -130,6 +130,10 @@ StringSet Settings::getDefaultSystemFeatures() actually require anything special on the machines. */ StringSet features{"nixos-test", "benchmark", "big-parallel"}; + #if __linux__ + features.insert("uid-range"); + #endif + #if __linux__ if (access("/dev/kvm", R_OK | W_OK) == 0) features.insert("kvm"); diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index d3e86cc55..be741a830 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -46,6 +46,8 @@ struct PluginFilesSetting : public BaseSetting void set(const std::string & str, bool append = false) override; }; +const uint32_t maxIdsPerBuild = 1 << 16; + class Settings : public Config { unsigned int getDefaultCores(); @@ -279,12 +281,10 @@ public: Setting autoAllocateUids{this, false, "auto-allocate-uids", "Whether to allocate UIDs for builders automatically."}; - const uint32_t idsPerBuild = 1 << 16; - Setting startId{this, 872415232, "start-id", "The first UID and GID to use for dynamic ID allocation."}; - Setting uidCount{this, idsPerBuild * 128, "id-count", + Setting uidCount{this, maxIdsPerBuild * 128, "id-count", "The number of UIDs/GIDs to use for dynamic ID allocation."}; #endif diff --git a/src/libstore/lock.cc b/src/libstore/lock.cc index cc3977496..ecc51cebe 100644 --- a/src/libstore/lock.cc +++ b/src/libstore/lock.cc @@ -20,12 +20,8 @@ struct SimpleUserLock : UserLock killUser(uid); } - std::pair getUIDRange() override - { - assert(uid); - return {uid, uid}; - } - + uid_t getUID() override { assert(uid); return uid; } + uid_t getUIDCount() override { return 1; } gid_t getGID() override { assert(gid); return gid; } std::vector getSupplementaryGIDs() override { return supplementaryGIDs; } @@ -115,48 +111,65 @@ struct SimpleUserLock : UserLock } }; -#if __linux__ -struct CgroupUserLock : UserLock +struct AutoUserLock : UserLock { AutoCloseFD fdUserLock; - uid_t uid; + uid_t firstUid = 0; + uid_t nrIds = 1; + #if __linux__ + std::optional cgroup; + #endif + + ~AutoUserLock() + { + // Get rid of our cgroup, ignoring errors. + if (cgroup) rmdir(cgroup->c_str()); + } void kill() override { + #if __linux__ if (cgroup) { + printError("KILL CGROUP %s", *cgroup); destroyCgroup(*cgroup); - cgroup.reset(); + if (mkdir(cgroup->c_str(), 0755) == -1) + throw SysError("creating cgroup '%s'", *cgroup); + } else + #endif + { + assert(firstUid); + printError("KILL USER %d", firstUid); + killUser(firstUid); } } - std::pair getUIDRange() override - { - assert(uid); - return {uid, uid + settings.idsPerBuild - 1}; - } + uid_t getUID() override { assert(firstUid); return firstUid; } + + gid_t getUIDCount() override { return nrIds; } gid_t getGID() override { // We use the same GID ranges as for the UIDs. - assert(uid); - return uid; + assert(firstUid); + return firstUid; } std::vector getSupplementaryGIDs() override { return {}; } - static std::unique_ptr acquire() + static std::unique_ptr acquire(uid_t nrIds) { settings.requireExperimentalFeature(Xp::AutoAllocateUids); assert(settings.startId > 0); - assert(settings.startId % settings.idsPerBuild == 0); - assert(settings.uidCount % settings.idsPerBuild == 0); + assert(settings.startId % maxIdsPerBuild == 0); + assert(settings.uidCount % maxIdsPerBuild == 0); assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits::max()); + assert(nrIds <= maxIdsPerBuild); // FIXME: check whether the id range overlaps any known users createDirs(settings.nixStateDir + "/userpool2"); - size_t nrSlots = settings.uidCount / settings.idsPerBuild; + size_t nrSlots = settings.uidCount / maxIdsPerBuild; for (size_t i = 0; i < nrSlots; i++) { debug("trying user slot '%d'", i); @@ -170,11 +183,47 @@ struct CgroupUserLock : UserLock throw SysError("opening user lock '%s'", fnUserLock); if (lockFile(fd.get(), ltWrite, false)) { - auto lock = std::make_unique(); + auto s = drainFD(fd.get()); + + #if __linux__ + if (s != "") { + /* Kill the old cgroup, to ensure there are no + processes left over from an interrupted build. */ + destroyCgroup(s); + } + #endif + + if (ftruncate(fd.get(), 0) == -1) + throw Error("truncating user lock"); + + auto lock = std::make_unique(); lock->fdUserLock = std::move(fd); - lock->uid = settings.startId + i * settings.idsPerBuild; - auto s = drainFD(lock->fdUserLock.get()); - if (s != "") lock->cgroup = s; + lock->firstUid = settings.startId + i * maxIdsPerBuild; + lock->nrIds = nrIds; + + if (nrIds > 1) { + auto ourCgroups = getCgroups("/proc/self/cgroup"); + auto ourCgroup = ourCgroups[""]; + if (ourCgroup == "") + throw Error("cannot determine cgroup name from /proc/self/cgroup"); + + auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup); + + printError("PARENT CGROUP = %s", ourCgroupPath); + + if (!pathExists(ourCgroupPath)) + throw Error("expected cgroup directory '%s'", ourCgroupPath); + + lock->cgroup = fmt("%s/nix-build-%d", ourCgroupPath, lock->firstUid); + + printError("CHILD CGROUP = %s", *lock->cgroup); + + /* Record the cgroup in the lock file. This ensures that + if we subsequently get executed under a different parent + cgroup, we kill the previous cgroup first. */ + writeFull(lock->fdUserLock.get(), *lock->cgroup); + } + return lock; } } @@ -182,50 +231,16 @@ struct CgroupUserLock : UserLock return nullptr; } - std::optional cgroup; - - std::optional getCgroup() override - { - if (!cgroup) { - /* Create a systemd cgroup since that's the minimum - required by systemd-nspawn. */ - auto ourCgroups = getCgroups("/proc/self/cgroup"); - auto systemdCgroup = ourCgroups["systemd"]; - if (systemdCgroup == "") - throw Error("'systemd' cgroup does not exist"); - - auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup); - - if (!pathExists(hostCgroup)) - throw Error("expected cgroup directory '%s'", hostCgroup); - - cgroup = fmt("%s/nix-%d", hostCgroup, uid); - - destroyCgroup(*cgroup); - - if (mkdir(cgroup->c_str(), 0755) == -1) - throw SysError("creating cgroup '%s'", *cgroup); - - /* Record the cgroup in the lock file. This ensures that - if we subsequently get executed under a different parent - cgroup, we kill the previous cgroup first. */ - if (ftruncate(fdUserLock.get(), 0) == -1) - throw Error("truncating user lock"); - writeFull(fdUserLock.get(), *cgroup); - } - - return cgroup; - }; -}; -#endif - -std::unique_ptr acquireUserLock() -{ #if __linux__ - if (settings.autoAllocateUids) - return CgroupUserLock::acquire(); - else + std::optional getCgroup() override { return cgroup; } #endif +}; + +std::unique_ptr acquireUserLock(uid_t nrIds) +{ + if (settings.autoAllocateUids) + return AutoUserLock::acquire(nrIds); + else return SimpleUserLock::acquire(); } diff --git a/src/libstore/lock.hh b/src/libstore/lock.hh index 4b6d34069..62676a523 100644 --- a/src/libstore/lock.hh +++ b/src/libstore/lock.hh @@ -11,18 +11,16 @@ struct UserLock virtual ~UserLock() { } /* Get the first and last UID. */ - virtual std::pair getUIDRange() = 0; + std::pair getUIDRange() + { + auto first = getUID(); + return {first, first + getUIDCount() - 1}; + } /* Get the first UID. */ - uid_t getUID() - { - return getUIDRange().first; - } + virtual uid_t getUID() = 0; - uid_t getUIDCount() - { - return getUIDRange().second - getUIDRange().first + 1; - } + virtual uid_t getUIDCount() = 0; virtual gid_t getGID() = 0; @@ -31,12 +29,14 @@ struct UserLock /* Kill any processes currently executing as this user. */ virtual void kill() = 0; + #if __linux__ virtual std::optional getCgroup() { return {}; }; + #endif }; -/* Acquire a user lock. Note that this may return nullptr if no user - is available. */ -std::unique_ptr acquireUserLock(); +/* Acquire a user lock for a UID range of size `nrIds`. Note that this + may return nullptr if no user is available. */ +std::unique_ptr acquireUserLock(uid_t nrIds); bool useBuildUsers(); diff --git a/src/libstore/parsed-derivations.cc b/src/libstore/parsed-derivations.cc index f2288a04e..487dbcfbb 100644 --- a/src/libstore/parsed-derivations.cc +++ b/src/libstore/parsed-derivations.cc @@ -90,6 +90,7 @@ std::optional ParsedDerivation::getStringsAttr(const std::string & name StringSet ParsedDerivation::getRequiredSystemFeatures() const { + // FIXME: cache this? StringSet res; for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings())) res.insert(i); @@ -125,6 +126,11 @@ bool ParsedDerivation::substitutesAllowed() const return getBoolAttr("allowSubstitutes", true); } +bool ParsedDerivation::useUidRange() const +{ + return getRequiredSystemFeatures().count("uid-range"); +} + static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*"); std::optional ParsedDerivation::prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths) diff --git a/src/libstore/parsed-derivations.hh b/src/libstore/parsed-derivations.hh index 95bec21e8..bfb3857c0 100644 --- a/src/libstore/parsed-derivations.hh +++ b/src/libstore/parsed-derivations.hh @@ -38,6 +38,8 @@ public: bool substitutesAllowed() const; + bool useUidRange() const; + std::optional prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths); }; From 05d258667d12b2decda87024a59250c43343b509 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 8 Nov 2022 08:00:29 -0800 Subject: [PATCH 13/36] Fix build on macOS --- src/libstore/build/local-derivation-goal.cc | 2 +- src/libstore/globals.hh | 26 +++++++++++++++++---- src/libstore/lock.cc | 10 ++++---- src/libstore/lock.hh | 2 ++ 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 09da87476..45ea9968f 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -659,7 +659,7 @@ void LocalDerivationGoal::startBuilder() } #else - if (useUidRange) + if (parsedDrv->useUidRange()) throw Error("feature 'uid-range' is not supported on this platform"); if (useSystemdCgroup) throw Error("feature 'systemd-cgroup' is not supported on this platform"); diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index be741a830..88fe72202 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -46,7 +46,13 @@ struct PluginFilesSetting : public BaseSetting void set(const std::string & str, bool append = false) override; }; -const uint32_t maxIdsPerBuild = 1 << 16; +const uint32_t maxIdsPerBuild = + #if __linux__ + 1 << 16 + #else + 1 + #endif + ; class Settings : public Config { @@ -277,16 +283,26 @@ public: multi-user settings with untrusted users. )"}; - #if __linux__ Setting autoAllocateUids{this, false, "auto-allocate-uids", "Whether to allocate UIDs for builders automatically."}; - Setting startId{this, 872415232, "start-id", + Setting startId{this, + #if __linux__ + 872415232, + #else + 56930, + #endif + "start-id", "The first UID and GID to use for dynamic ID allocation."}; - Setting uidCount{this, maxIdsPerBuild * 128, "id-count", + Setting uidCount{this, + #if __linux__ + maxIdsPerBuild * 128, + #else + 128, + #endif + "id-count", "The number of UIDs/GIDs to use for dynamic ID allocation."}; - #endif Setting impersonateLinux26{this, false, "impersonate-linux-26", "Whether to impersonate a Linux 2.6 machine on newer kernels.", diff --git a/src/libstore/lock.cc b/src/libstore/lock.cc index ecc51cebe..f9892bb91 100644 --- a/src/libstore/lock.cc +++ b/src/libstore/lock.cc @@ -122,15 +122,16 @@ struct AutoUserLock : UserLock ~AutoUserLock() { + #if __linux__ // Get rid of our cgroup, ignoring errors. if (cgroup) rmdir(cgroup->c_str()); + #endif } void kill() override { #if __linux__ if (cgroup) { - printError("KILL CGROUP %s", *cgroup); destroyCgroup(*cgroup); if (mkdir(cgroup->c_str(), 0755) == -1) throw SysError("creating cgroup '%s'", *cgroup); @@ -138,7 +139,6 @@ struct AutoUserLock : UserLock #endif { assert(firstUid); - printError("KILL USER %d", firstUid); killUser(firstUid); } } @@ -201,6 +201,7 @@ struct AutoUserLock : UserLock lock->firstUid = settings.startId + i * maxIdsPerBuild; lock->nrIds = nrIds; + #if __linux__ if (nrIds > 1) { auto ourCgroups = getCgroups("/proc/self/cgroup"); auto ourCgroup = ourCgroups[""]; @@ -209,20 +210,17 @@ struct AutoUserLock : UserLock auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup); - printError("PARENT CGROUP = %s", ourCgroupPath); - if (!pathExists(ourCgroupPath)) throw Error("expected cgroup directory '%s'", ourCgroupPath); lock->cgroup = fmt("%s/nix-build-%d", ourCgroupPath, lock->firstUid); - printError("CHILD CGROUP = %s", *lock->cgroup); - /* Record the cgroup in the lock file. This ensures that if we subsequently get executed under a different parent cgroup, we kill the previous cgroup first. */ writeFull(lock->fdUserLock.get(), *lock->cgroup); } + #endif return lock; } diff --git a/src/libstore/lock.hh b/src/libstore/lock.hh index 62676a523..b5536408c 100644 --- a/src/libstore/lock.hh +++ b/src/libstore/lock.hh @@ -4,6 +4,8 @@ #include +#include + namespace nix { struct UserLock From 6c6eff8ac40e2f5d7b6ff8e772feebb1aa484039 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 10 Nov 2022 17:24:12 +0100 Subject: [PATCH 14/36] Remove the SystemdCgroup feature --- src/libstore/build/local-derivation-goal.cc | 23 +++++++-------------- src/libstore/build/local-derivation-goal.hh | 4 ---- src/libutil/experimental-features.cc | 1 - src/libutil/experimental-features.hh | 1 - 4 files changed, 8 insertions(+), 21 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 45ea9968f..e652c425c 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -495,9 +495,6 @@ void LocalDerivationGoal::startBuilder() } } - useSystemdCgroup = parsedDrv->getRequiredSystemFeatures().count("Systemd-cgroup"); - assert(!useSystemdCgroup); - if (useChroot) { /* Allow a user-configurable set of directories from the @@ -649,20 +646,18 @@ void LocalDerivationGoal::startBuilder() dirsInChroot.erase(worker.store.printStorePath(*i.second.second)); } - if (useSystemdCgroup) { - settings.requireExperimentalFeature(Xp::SystemdCgroup); - std::optional cgroup; - if (!buildUser || !(cgroup = buildUser->getCgroup())) - throw Error("feature 'systemd-cgroup' requires 'auto-allocate-uids = true' in nix.conf"); - chownToBuilder(*cgroup); - chownToBuilder(*cgroup + "/cgroup.procs"); + if (buildUser) { + if (auto cgroup = buildUser->getCgroup()) { + chownToBuilder(*cgroup); + chownToBuilder(*cgroup + "/cgroup.procs"); + chownToBuilder(*cgroup + "/cgroup.threads"); + //chownToBuilder(*cgroup + "/cgroup.subtree_control"); + } } #else if (parsedDrv->useUidRange()) throw Error("feature 'uid-range' is not supported on this platform"); - if (useSystemdCgroup) - throw Error("feature 'systemd-cgroup' is not supported on this platform"); #if __APPLE__ /* We don't really have any parent prep work to do (yet?) All work happens in the child, instead. */ @@ -673,8 +668,6 @@ void LocalDerivationGoal::startBuilder() } else { if (parsedDrv->useUidRange()) throw Error("feature 'uid-range' is only supported in sandboxed builds"); - if (useSystemdCgroup) - throw Error("feature 'systemd-cgroup' is only supported in sandboxed builds"); } if (needsHashRewrite() && pathExists(homeDir)) @@ -1845,7 +1838,7 @@ void LocalDerivationGoal::runChild() /* Unshare the cgroup namespace. This means /proc/self/cgroup will show the child's cgroup as '/' rather than whatever it is in the parent. */ - if (useSystemdCgroup && unshare(CLONE_NEWCGROUP) == -1) + if (buildUser && buildUser->getUIDCount() != 1 && unshare(CLONE_NEWCGROUP) == -1) throw SysError("unsharing cgroup namespace"); /* Do the chroot(). */ diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index 61b0f9145..070ae53f3 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -41,10 +41,6 @@ struct LocalDerivationGoal : public DerivationGoal Path chrootRootDir; - /* Whether to make the 'systemd' cgroup controller available to - the build. */ - bool useSystemdCgroup = false; - /* RAII object to delete the chroot directory. */ std::shared_ptr autoDelChroot; diff --git a/src/libutil/experimental-features.cc b/src/libutil/experimental-features.cc index 670079019..0f05f3752 100644 --- a/src/libutil/experimental-features.cc +++ b/src/libutil/experimental-features.cc @@ -15,7 +15,6 @@ std::map stringifiedXpFeatures = { { Xp::FetchClosure, "fetch-closure" }, { Xp::ReplFlake, "repl-flake" }, { Xp::AutoAllocateUids, "auto-allocate-uids" }, - { Xp::SystemdCgroup, "systemd-cgroup" }, }; const std::optional parseExperimentalFeature(const std::string_view & name) diff --git a/src/libutil/experimental-features.hh b/src/libutil/experimental-features.hh index c749d4767..cf0c06eac 100644 --- a/src/libutil/experimental-features.hh +++ b/src/libutil/experimental-features.hh @@ -24,7 +24,6 @@ enum struct ExperimentalFeature FetchClosure, ReplFlake, AutoAllocateUids, - SystemdCgroup, }; /** From f423d4425f6573206045e9626812002906d9493d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 17 Nov 2022 11:56:45 +0100 Subject: [PATCH 15/36] Fix segfault in unprivileged mode --- src/libstore/build/local-derivation-goal.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index 070ae53f3..f92280aa1 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -92,8 +92,8 @@ struct LocalDerivationGoal : public DerivationGoal result. */ std::map prevInfos; - uid_t sandboxUid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 1000 : 0) : buildUser->getUID(); } - gid_t sandboxGid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 100 : 0) : buildUser->getGID(); } + uid_t sandboxUid() { return usingUserNamespace ? (!buildUser || buildUser->getUIDCount() == 1 ? 1000 : 0) : buildUser->getUID(); } + gid_t sandboxGid() { return usingUserNamespace ? (!buildUser || buildUser->getUIDCount() == 1 ? 100 : 0) : buildUser->getGID(); } const static Path homeDir; From f1ab082ac4f589a36a9eb0cd98d1cc235eedc419 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 18 Nov 2022 09:37:11 +0100 Subject: [PATCH 16/36] createTempDir(): Use std::atomic --- src/libutil/filesystem.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/libutil/filesystem.cc b/src/libutil/filesystem.cc index 403389e60..3a732cff8 100644 --- a/src/libutil/filesystem.cc +++ b/src/libutil/filesystem.cc @@ -1,5 +1,6 @@ #include #include +#include #include "finally.hh" #include "util.hh" @@ -10,7 +11,7 @@ namespace fs = std::filesystem; namespace nix { static Path tempName(Path tmpRoot, const Path & prefix, bool includePid, - int & counter) + std::atomic & counter) { tmpRoot = canonPath(tmpRoot.empty() ? getEnv("TMPDIR").value_or("/tmp") : tmpRoot, true); if (includePid) @@ -22,9 +23,9 @@ static Path tempName(Path tmpRoot, const Path & prefix, bool includePid, Path createTempDir(const Path & tmpRoot, const Path & prefix, bool includePid, bool useGlobalCounter, mode_t mode) { - static int globalCounter = 0; - int localCounter = 0; - int & counter(useGlobalCounter ? globalCounter : localCounter); + static std::atomic globalCounter = 0; + std::atomic localCounter = 0; + auto & counter(useGlobalCounter ? globalCounter : localCounter); while (1) { checkInterrupt(); From 128910ba23f586ba1765a137ecff23cfd22cff89 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 18 Nov 2022 10:39:28 +0100 Subject: [PATCH 17/36] Separate cgroup support from auto-uid-allocation The new experimental feature 'cgroups' enables the use of cgroups for all builds. This allows better containment and enables setting resource limits and getting some build stats. --- src/libstore/build/local-derivation-goal.cc | 114 ++++++++++++++------ src/libstore/build/local-derivation-goal.hh | 7 ++ src/libstore/lock.cc | 71 ------------ src/libstore/lock.hh | 7 -- src/libutil/experimental-features.cc | 1 + src/libutil/experimental-features.hh | 1 + 6 files changed, 93 insertions(+), 108 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index e652c425c..2d1e093ca 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -15,6 +15,7 @@ #include "topo-sort.hh" #include "callback.hh" #include "json-utils.hh" +#include "cgroup.hh" #include #include @@ -129,26 +130,36 @@ void LocalDerivationGoal::killChild() if (pid != -1) { worker.childTerminated(this); - if (buildUser) { - /* If we're using a build user, then there is a tricky - race condition: if we kill the build user before the - child has done its setuid() to the build user uid, then - it won't be killed, and we'll potentially lock up in - pid.wait(). So also send a conventional kill to the - child. */ - ::kill(-pid, SIGKILL); /* ignore the result */ - buildUser->kill(); - pid.wait(); - } else - pid.kill(); + /* If we're using a build user, then there is a tricky race + condition: if we kill the build user before the child has + done its setuid() to the build user uid, then it won't be + killed, and we'll potentially lock up in pid.wait(). So + also send a conventional kill to the child. */ + ::kill(-pid, SIGKILL); /* ignore the result */ - assert(pid == -1); + killSandbox(); + + pid.wait(); } DerivationGoal::killChild(); } +void LocalDerivationGoal::killSandbox() +{ + if (cgroup) { + destroyCgroup(*cgroup); + } + + else if (buildUser) { + auto uid = buildUser->getUID(); + assert(uid != 0); + killUser(uid); + } +} + + void LocalDerivationGoal::tryLocalBuild() { unsigned int curBuilds = worker.getNrLocalBuilds(); if (curBuilds >= settings.maxBuildJobs) { @@ -169,10 +180,6 @@ void LocalDerivationGoal::tryLocalBuild() { worker.waitForAWhile(shared_from_this()); return; } - - /* Make sure that no other processes are executing under this - uid. */ - buildUser->kill(); } actLock.reset(); @@ -263,7 +270,7 @@ void LocalDerivationGoal::cleanupPostChildKill() malicious user from leaving behind a process that keeps files open and modifies them after they have been chown'ed to root. */ - if (buildUser) buildUser->kill(); + killSandbox(); /* Terminate the recursive Nix daemon. */ stopDaemon(); @@ -356,6 +363,55 @@ static void linkOrCopy(const Path & from, const Path & to) void LocalDerivationGoal::startBuilder() { + if ((buildUser && buildUser->getUIDCount() != 1) + || settings.isExperimentalFeatureEnabled(Xp::Cgroups)) + { + #if __linux__ + auto ourCgroups = getCgroups("/proc/self/cgroup"); + auto ourCgroup = ourCgroups[""]; + if (ourCgroup == "") + throw Error("cannot determine cgroup name from /proc/self/cgroup"); + + auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup); + + if (!pathExists(ourCgroupPath)) + throw Error("expected cgroup directory '%s'", ourCgroupPath); + + static std::atomic counter{0}; + + cgroup = buildUser + ? fmt("%s/nix-build-uid-%d", ourCgroupPath, buildUser->getUID()) + : fmt("%s/nix-build-pid-%d-%d", ourCgroupPath, getpid(), counter++); + + debug("using cgroup '%s'", *cgroup); + + /* When using a build user, record the cgroup we used for that + user so that if we got interrupted previously, we can kill + any left-over cgroup first. */ + if (buildUser) { + auto cgroupsDir = settings.nixStateDir + "/cgroups"; + createDirs(cgroupsDir); + + auto cgroupFile = fmt("%s/%d", cgroupsDir, buildUser->getUID()); + + if (pathExists(cgroupFile)) { + auto prevCgroup = readFile(cgroupFile); + destroyCgroup(prevCgroup); + } + + writeFile(cgroupFile, *cgroup); + } + + #else + throw Error("cgroups are not supported on this platform"); + #endif + } + + /* Make sure that no other processes are executing under the + sandbox uids. This must be done before any chownToBuilder() + calls. */ + killSandbox(); + /* Right platform? */ if (!parsedDrv->canBuildLocally(worker.store)) throw Error("a '%s' with features {%s} is required to build '%s', but I am a '%s' with features {%s}", @@ -646,13 +702,13 @@ void LocalDerivationGoal::startBuilder() dirsInChroot.erase(worker.store.printStorePath(*i.second.second)); } - if (buildUser) { - if (auto cgroup = buildUser->getCgroup()) { - chownToBuilder(*cgroup); - chownToBuilder(*cgroup + "/cgroup.procs"); - chownToBuilder(*cgroup + "/cgroup.threads"); - //chownToBuilder(*cgroup + "/cgroup.subtree_control"); - } + if (cgroup) { + if (mkdir(cgroup->c_str(), 0755) != 0) + throw SysError("creating cgroup '%s'", *cgroup); + chownToBuilder(*cgroup); + chownToBuilder(*cgroup + "/cgroup.procs"); + chownToBuilder(*cgroup + "/cgroup.threads"); + //chownToBuilder(*cgroup + "/cgroup.subtree_control"); } #else @@ -965,10 +1021,8 @@ void LocalDerivationGoal::startBuilder() } /* Move the child into its own cgroup. */ - if (buildUser) { - if (auto cgroup = buildUser->getCgroup()) - writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); - } + if (cgroup) + writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); /* Signal the builder that we've updated its user namespace. */ writeFull(userNamespaceSync.writeSide.get(), "1"); @@ -1838,7 +1892,7 @@ void LocalDerivationGoal::runChild() /* Unshare the cgroup namespace. This means /proc/self/cgroup will show the child's cgroup as '/' rather than whatever it is in the parent. */ - if (buildUser && buildUser->getUIDCount() != 1 && unshare(CLONE_NEWCGROUP) == -1) + if (cgroup && unshare(CLONE_NEWCGROUP) == -1) throw SysError("unsharing cgroup namespace"); /* Do the chroot(). */ diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index f92280aa1..1ec6b3649 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -15,6 +15,9 @@ struct LocalDerivationGoal : public DerivationGoal /* The process ID of the builder. */ Pid pid; + /* The cgroup of the builder, if any. */ + std::optional cgroup; + /* The temporary directory. */ Path tmpDir; @@ -197,6 +200,10 @@ struct LocalDerivationGoal : public DerivationGoal /* Forcibly kill the child process, if any. */ void killChild() override; + /* Kill any processes running under the build user UID or in the + cgroup of the build. */ + void killSandbox(); + /* Create alternative path calculated from but distinct from the input, so we can avoid overwriting outputs (or other store paths) that already exist. */ diff --git a/src/libstore/lock.cc b/src/libstore/lock.cc index f9892bb91..4fad3bfd2 100644 --- a/src/libstore/lock.cc +++ b/src/libstore/lock.cc @@ -1,7 +1,6 @@ #include "lock.hh" #include "globals.hh" #include "pathlocks.hh" -#include "cgroup.hh" #include #include @@ -15,11 +14,6 @@ struct SimpleUserLock : UserLock gid_t gid; std::vector supplementaryGIDs; - void kill() override - { - killUser(uid); - } - uid_t getUID() override { assert(uid); return uid; } uid_t getUIDCount() override { return 1; } gid_t getGID() override { assert(gid); return gid; } @@ -116,32 +110,6 @@ struct AutoUserLock : UserLock AutoCloseFD fdUserLock; uid_t firstUid = 0; uid_t nrIds = 1; - #if __linux__ - std::optional cgroup; - #endif - - ~AutoUserLock() - { - #if __linux__ - // Get rid of our cgroup, ignoring errors. - if (cgroup) rmdir(cgroup->c_str()); - #endif - } - - void kill() override - { - #if __linux__ - if (cgroup) { - destroyCgroup(*cgroup); - if (mkdir(cgroup->c_str(), 0755) == -1) - throw SysError("creating cgroup '%s'", *cgroup); - } else - #endif - { - assert(firstUid); - killUser(firstUid); - } - } uid_t getUID() override { assert(firstUid); return firstUid; } @@ -183,55 +151,16 @@ struct AutoUserLock : UserLock throw SysError("opening user lock '%s'", fnUserLock); if (lockFile(fd.get(), ltWrite, false)) { - auto s = drainFD(fd.get()); - - #if __linux__ - if (s != "") { - /* Kill the old cgroup, to ensure there are no - processes left over from an interrupted build. */ - destroyCgroup(s); - } - #endif - - if (ftruncate(fd.get(), 0) == -1) - throw Error("truncating user lock"); - auto lock = std::make_unique(); lock->fdUserLock = std::move(fd); lock->firstUid = settings.startId + i * maxIdsPerBuild; lock->nrIds = nrIds; - - #if __linux__ - if (nrIds > 1) { - auto ourCgroups = getCgroups("/proc/self/cgroup"); - auto ourCgroup = ourCgroups[""]; - if (ourCgroup == "") - throw Error("cannot determine cgroup name from /proc/self/cgroup"); - - auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup); - - if (!pathExists(ourCgroupPath)) - throw Error("expected cgroup directory '%s'", ourCgroupPath); - - lock->cgroup = fmt("%s/nix-build-%d", ourCgroupPath, lock->firstUid); - - /* Record the cgroup in the lock file. This ensures that - if we subsequently get executed under a different parent - cgroup, we kill the previous cgroup first. */ - writeFull(lock->fdUserLock.get(), *lock->cgroup); - } - #endif - return lock; } } return nullptr; } - - #if __linux__ - std::optional getCgroup() override { return cgroup; } - #endif }; std::unique_ptr acquireUserLock(uid_t nrIds) diff --git a/src/libstore/lock.hh b/src/libstore/lock.hh index b5536408c..e7ceefab8 100644 --- a/src/libstore/lock.hh +++ b/src/libstore/lock.hh @@ -27,13 +27,6 @@ struct UserLock virtual gid_t getGID() = 0; virtual std::vector getSupplementaryGIDs() = 0; - - /* Kill any processes currently executing as this user. */ - virtual void kill() = 0; - - #if __linux__ - virtual std::optional getCgroup() { return {}; }; - #endif }; /* Acquire a user lock for a UID range of size `nrIds`. Note that this diff --git a/src/libutil/experimental-features.cc b/src/libutil/experimental-features.cc index 0f05f3752..e0902971e 100644 --- a/src/libutil/experimental-features.cc +++ b/src/libutil/experimental-features.cc @@ -15,6 +15,7 @@ std::map stringifiedXpFeatures = { { Xp::FetchClosure, "fetch-closure" }, { Xp::ReplFlake, "repl-flake" }, { Xp::AutoAllocateUids, "auto-allocate-uids" }, + { Xp::Cgroups, "cgroups" }, }; const std::optional parseExperimentalFeature(const std::string_view & name) diff --git a/src/libutil/experimental-features.hh b/src/libutil/experimental-features.hh index cf0c06eac..af775feb0 100644 --- a/src/libutil/experimental-features.hh +++ b/src/libutil/experimental-features.hh @@ -24,6 +24,7 @@ enum struct ExperimentalFeature FetchClosure, ReplFlake, AutoAllocateUids, + Cgroups, }; /** From 20f66c6889aa9d907feee4946702d655b6bd796f Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 18 Nov 2022 13:40:48 +0100 Subject: [PATCH 18/36] Indentation --- src/libcmd/installables.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libcmd/installables.cc b/src/libcmd/installables.cc index e097f23b3..5945a2578 100644 --- a/src/libcmd/installables.cc +++ b/src/libcmd/installables.cc @@ -927,7 +927,7 @@ std::vector, BuiltPath>> Installable::bui case Realise::Outputs: { if (settings.printMissing) - printMissing(store, pathsToBuild, lvlInfo); + printMissing(store, pathsToBuild, lvlInfo); for (auto & buildResult : store->buildPathsWithResults(pathsToBuild, bMode, evalStore)) { if (!buildResult.success()) From fa68eb367e79297bb1c0451cd92ad18a06edce96 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 18 Nov 2022 13:40:59 +0100 Subject: [PATCH 19/36] Get CPU stats from the cgroup --- src/libstore/build-result.hh | 5 ++- src/libstore/build/derivation-goal.cc | 8 +++++ src/libstore/build/local-derivation-goal.cc | 14 +++++--- src/libstore/build/local-derivation-goal.hh | 2 +- src/libstore/cgroup.cc | 39 +++++++++++++++++++-- src/libstore/cgroup.hh | 14 +++++++- 6 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/libstore/build-result.hh b/src/libstore/build-result.hh index 24fb1f763..a5749cf33 100644 --- a/src/libstore/build-result.hh +++ b/src/libstore/build-result.hh @@ -5,7 +5,7 @@ #include #include - +#include namespace nix { @@ -78,6 +78,9 @@ struct BuildResult was repeated). */ time_t startTime = 0, stopTime = 0; + /* User and system CPU time the build took. */ + std::optional cpuUser, cpuSystem; + bool success() { return status == Built || status == Substituted || status == AlreadyValid || status == ResolvesToAlreadyValid; diff --git a/src/libstore/build/derivation-goal.cc b/src/libstore/build/derivation-goal.cc index 41d2e2a1c..9bc3dc742 100644 --- a/src/libstore/build/derivation-goal.cc +++ b/src/libstore/build/derivation-goal.cc @@ -869,6 +869,14 @@ void DerivationGoal::buildDone() cleanupPostChildKill(); + if (buildResult.cpuUser && buildResult.cpuSystem) { + debug("builder for '%s' terminated with status %d, user CPU %.3fs, system CPU %.3fs", + worker.store.printStorePath(drvPath), + status, + ((double) buildResult.cpuUser->count()) / 1000000, + ((double) buildResult.cpuSystem->count()) / 1000000); + } + bool diskFull = false; try { diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 2d1e093ca..f273ebe8a 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -137,7 +137,7 @@ void LocalDerivationGoal::killChild() also send a conventional kill to the child. */ ::kill(-pid, SIGKILL); /* ignore the result */ - killSandbox(); + killSandbox(true); pid.wait(); } @@ -146,10 +146,14 @@ void LocalDerivationGoal::killChild() } -void LocalDerivationGoal::killSandbox() +void LocalDerivationGoal::killSandbox(bool getStats) { if (cgroup) { - destroyCgroup(*cgroup); + auto stats = destroyCgroup(*cgroup); + if (getStats) { + buildResult.cpuUser = stats.cpuUser; + buildResult.cpuSystem = stats.cpuSystem; + } } else if (buildUser) { @@ -270,7 +274,7 @@ void LocalDerivationGoal::cleanupPostChildKill() malicious user from leaving behind a process that keeps files open and modifies them after they have been chown'ed to root. */ - killSandbox(); + killSandbox(true); /* Terminate the recursive Nix daemon. */ stopDaemon(); @@ -410,7 +414,7 @@ void LocalDerivationGoal::startBuilder() /* Make sure that no other processes are executing under the sandbox uids. This must be done before any chownToBuilder() calls. */ - killSandbox(); + killSandbox(false); /* Right platform? */ if (!parsedDrv->canBuildLocally(worker.store)) diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index 1ec6b3649..34c4e9187 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -202,7 +202,7 @@ struct LocalDerivationGoal : public DerivationGoal /* Kill any processes running under the build user UID or in the cgroup of the build. */ - void killSandbox(); + void killSandbox(bool getStats); /* Create alternative path calculated from but distinct from the input, so we can avoid overwriting outputs (or other store paths) diff --git a/src/libstore/cgroup.cc b/src/libstore/cgroup.cc index 56e980be3..2a485f0f9 100644 --- a/src/libstore/cgroup.cc +++ b/src/libstore/cgroup.cc @@ -31,13 +31,16 @@ std::map getCgroups(const Path & cgroupFile) return cgroups; } -void destroyCgroup(const Path & cgroup) +static CgroupStats destroyCgroup(const Path & cgroup, bool returnStats) { - if (!pathExists(cgroup)) return; + if (!pathExists(cgroup)) return {}; + + if (!pathExists(cgroup + "/cgroup.procs")) + throw Error("'%s' is not a cgroup", cgroup); for (auto & entry : readDirectory(cgroup)) { if (entry.type != DT_DIR) continue; - destroyCgroup(cgroup + "/" + entry.name); + destroyCgroup(cgroup + "/" + entry.name, false); } int round = 1; @@ -79,8 +82,38 @@ void destroyCgroup(const Path & cgroup) round++; } + CgroupStats stats; + + if (returnStats) { + auto cpustatPath = cgroup + "/cpu.stat"; + + if (pathExists(cpustatPath)) { + for (auto & line : tokenizeString>(readFile(cpustatPath), "\n")) { + std::string_view userPrefix = "user_usec "; + if (hasPrefix(line, userPrefix)) { + auto n = string2Int(line.substr(userPrefix.size())); + if (n) stats.cpuUser = std::chrono::microseconds(*n); + } + + std::string_view systemPrefix = "system_usec "; + if (hasPrefix(line, systemPrefix)) { + auto n = string2Int(line.substr(systemPrefix.size())); + if (n) stats.cpuSystem = std::chrono::microseconds(*n); + } + } + } + + } + if (rmdir(cgroup.c_str()) == -1) throw SysError("deleting cgroup '%s'", cgroup); + + return stats; +} + +CgroupStats destroyCgroup(const Path & cgroup) +{ + return destroyCgroup(cgroup, true); } } diff --git a/src/libstore/cgroup.hh b/src/libstore/cgroup.hh index dc6758957..3ead4735f 100644 --- a/src/libstore/cgroup.hh +++ b/src/libstore/cgroup.hh @@ -2,13 +2,25 @@ #if __linux__ +#include +#include + #include "types.hh" namespace nix { std::map getCgroups(const Path & cgroupFile); -void destroyCgroup(const Path & cgroup); +struct CgroupStats +{ + std::optional cpuUser, cpuSystem; +}; + +/* Destroy the cgroup denoted by 'path'. The postcondition is that + 'path' does not exist, and thus any processes in the cgroup have + been killed. Also return statistics from the cgroup just before + destruction. */ +CgroupStats destroyCgroup(const Path & cgroup); } From e6b71f84a0a766429fdceaf188ea0167e36a20d9 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 18 Nov 2022 16:59:36 +0100 Subject: [PATCH 20/36] Use cgroup.kill to quickly kill cgroups --- src/libstore/cgroup.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/libstore/cgroup.cc b/src/libstore/cgroup.cc index 2a485f0f9..f693d77be 100644 --- a/src/libstore/cgroup.cc +++ b/src/libstore/cgroup.cc @@ -35,9 +35,19 @@ static CgroupStats destroyCgroup(const Path & cgroup, bool returnStats) { if (!pathExists(cgroup)) return {}; - if (!pathExists(cgroup + "/cgroup.procs")) + auto procsFile = cgroup + "/cgroup.procs"; + + if (!pathExists(procsFile)) throw Error("'%s' is not a cgroup", cgroup); + /* Use the fast way to kill every process in a cgroup, if + available. */ + auto killFile = cgroup + "/cgroup.kill"; + if (pathExists(killFile)) + writeFile(killFile, "1"); + + /* Otherwise, manually kill every process in the subcgroups and + this cgroup. */ for (auto & entry : readDirectory(cgroup)) { if (entry.type != DT_DIR) continue; destroyCgroup(cgroup + "/" + entry.name, false); @@ -48,7 +58,7 @@ static CgroupStats destroyCgroup(const Path & cgroup, bool returnStats) std::unordered_set pidsShown; while (true) { - auto pids = tokenizeString>(readFile(cgroup + "/cgroup.procs")); + auto pids = tokenizeString>(readFile(procsFile)); if (pids.empty()) break; From f538ee434285304cb61cf10bf13127f13bfced1b Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 21 Nov 2022 09:38:08 +0100 Subject: [PATCH 21/36] Rename derivedPathsWithHintsToJSON -> builtPathsToJSON --- src/libstore/derived-path.cc | 2 +- src/libstore/derived-path.hh | 2 +- src/nix/build.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libstore/derived-path.cc b/src/libstore/derived-path.cc index 44587ae78..7fe9b9648 100644 --- a/src/libstore/derived-path.cc +++ b/src/libstore/derived-path.cc @@ -64,7 +64,7 @@ nlohmann::json stuffToJSON(const std::vector & ts, ref store) { return res; } -nlohmann::json derivedPathsWithHintsToJSON(const BuiltPaths & buildables, ref store) +nlohmann::json builtPathsToJSON(const BuiltPaths & buildables, ref store) { return stuffToJSON(buildables, store); } nlohmann::json derivedPathsToJSON(const DerivedPaths & paths, ref store) { return stuffToJSON(paths, store); } diff --git a/src/libstore/derived-path.hh b/src/libstore/derived-path.hh index 24a0ae773..b2d0956b8 100644 --- a/src/libstore/derived-path.hh +++ b/src/libstore/derived-path.hh @@ -125,7 +125,7 @@ struct BuiltPath : _BuiltPathRaw { typedef std::vector DerivedPaths; typedef std::vector BuiltPaths; -nlohmann::json derivedPathsWithHintsToJSON(const BuiltPaths & buildables, ref store); +nlohmann::json builtPathsToJSON(const BuiltPaths & buildables, ref store); nlohmann::json derivedPathsToJSON(const DerivedPaths & , ref store); } diff --git a/src/nix/build.cc b/src/nix/build.cc index 9c648d28e..2b91f8e0a 100644 --- a/src/nix/build.cc +++ b/src/nix/build.cc @@ -78,7 +78,7 @@ struct CmdBuild : InstallablesCommand, MixDryRun, MixJSON, MixProfile Realise::Outputs, installables, buildMode); - if (json) logger->cout("%s", derivedPathsWithHintsToJSON(buildables, store).dump()); + if (json) logger->cout("%s", builtPathsToJSON(buildables, store).dump()); if (outLink != "") if (auto store2 = store.dynamic_pointer_cast()) From 300753d594fd7cd818d08f9c7a18a9ebc305bd95 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 21 Nov 2022 10:49:01 +0100 Subject: [PATCH 22/36] nix build --json: Include build statistics Example: # nix build -L --extra-experimental-features cgroups --impure --expr 'with import {}; runCommand "foo" {} "dd if=/dev/urandom bs=1M count=1024 | md5sum; mkdir $out"' --json [ { "cpuSystem": 1.911431, "cpuUser": 1.214249, "drvPath": "/nix/store/xzdqz67xba18hljhycp0hwfigzrs2z69-foo.drv", "outputs": { "out": "/nix/store/rh9mc9l2gkpq8kn2sgzndr6ll7ffjh6l-foo" }, "startTime": 1669024076, "stopTime": 1669024079 } ] --- src/libcmd/installables.cc | 29 ++++++++++++++----------- src/libcmd/installables.hh | 11 ++++++++-- src/libstore/derived-path.cc | 23 ++++---------------- src/libstore/derived-path.hh | 3 --- src/nix/app.cc | 11 +++++++--- src/nix/build.cc | 42 ++++++++++++++++++++++++++++++++---- src/nix/profile.cc | 4 ++-- 7 files changed, 77 insertions(+), 46 deletions(-) diff --git a/src/libcmd/installables.cc b/src/libcmd/installables.cc index 5945a2578..e036b8836 100644 --- a/src/libcmd/installables.cc +++ b/src/libcmd/installables.cc @@ -840,20 +840,20 @@ std::shared_ptr SourceExprCommand::parseInstallable( return installables.front(); } -BuiltPaths Installable::build( +std::vector Installable::build( ref evalStore, ref store, Realise mode, const std::vector> & installables, BuildMode bMode) { - BuiltPaths res; - for (auto & [_, builtPath] : build2(evalStore, store, mode, installables, bMode)) - res.push_back(builtPath); + std::vector res; + for (auto & [_, builtPathWithResult] : build2(evalStore, store, mode, installables, bMode)) + res.push_back(builtPathWithResult); return res; } -std::vector, BuiltPath>> Installable::build2( +std::vector, BuiltPathWithResult>> Installable::build2( ref evalStore, ref store, Realise mode, @@ -873,7 +873,7 @@ std::vector, BuiltPath>> Installable::bui } } - std::vector, BuiltPath>> res; + std::vector, BuiltPathWithResult>> res; switch (mode) { @@ -914,10 +914,10 @@ std::vector, BuiltPath>> Installable::bui output, *drvOutput->second); } } - res.push_back({installable, BuiltPath::Built { bfd.drvPath, outputs }}); + res.push_back({installable, {.path = BuiltPath::Built { bfd.drvPath, outputs }}}); }, [&](const DerivedPath::Opaque & bo) { - res.push_back({installable, BuiltPath::Opaque { bo.path }}); + res.push_back({installable, {.path = BuiltPath::Opaque { bo.path }}}); }, }, path.raw()); } @@ -939,10 +939,10 @@ std::vector, BuiltPath>> Installable::bui std::map outputs; for (auto & path : buildResult.builtOutputs) outputs.emplace(path.first.outputName, path.second.outPath); - res.push_back({installable, BuiltPath::Built { bfd.drvPath, outputs }}); + res.push_back({installable, {.path = BuiltPath::Built { bfd.drvPath, outputs }, .result = buildResult}}); }, [&](const DerivedPath::Opaque & bo) { - res.push_back({installable, BuiltPath::Opaque { bo.path }}); + res.push_back({installable, {.path = BuiltPath::Opaque { bo.path }, .result = buildResult}}); }, }, buildResult.path.raw()); } @@ -965,9 +965,12 @@ BuiltPaths Installable::toBuiltPaths( OperateOn operateOn, const std::vector> & installables) { - if (operateOn == OperateOn::Output) - return Installable::build(evalStore, store, mode, installables); - else { + if (operateOn == OperateOn::Output) { + BuiltPaths res; + for (auto & p : Installable::build(evalStore, store, mode, installables)) + res.push_back(p.path); + return res; + } else { if (mode == Realise::Nothing) settings.readOnlyMode = true; diff --git a/src/libcmd/installables.hh b/src/libcmd/installables.hh index 948f78919..02ea351d3 100644 --- a/src/libcmd/installables.hh +++ b/src/libcmd/installables.hh @@ -7,6 +7,7 @@ #include "eval.hh" #include "store-api.hh" #include "flake/flake.hh" +#include "build-result.hh" #include @@ -51,6 +52,12 @@ enum class OperateOn { Derivation }; +struct BuiltPathWithResult +{ + BuiltPath path; + std::optional result; +}; + struct Installable { virtual ~Installable() { } @@ -91,14 +98,14 @@ struct Installable return FlakeRef::fromAttrs({{"type","indirect"}, {"id", "nixpkgs"}}); } - static BuiltPaths build( + static std::vector build( ref evalStore, ref store, Realise mode, const std::vector> & installables, BuildMode bMode = bmNormal); - static std::vector, BuiltPath>> build2( + static std::vector, BuiltPathWithResult>> build2( ref evalStore, ref store, Realise mode, diff --git a/src/libstore/derived-path.cc b/src/libstore/derived-path.cc index 7fe9b9648..88b59f615 100644 --- a/src/libstore/derived-path.cc +++ b/src/libstore/derived-path.cc @@ -53,28 +53,13 @@ StorePathSet BuiltPath::outPaths() const ); } -template -nlohmann::json stuffToJSON(const std::vector & ts, ref store) { - auto res = nlohmann::json::array(); - for (const T & t : ts) { - std::visit([&res, store](const auto & t) { - res.push_back(t.toJSON(store)); - }, t.raw()); - } - return res; -} - -nlohmann::json builtPathsToJSON(const BuiltPaths & buildables, ref store) -{ return stuffToJSON(buildables, store); } -nlohmann::json derivedPathsToJSON(const DerivedPaths & paths, ref store) -{ return stuffToJSON(paths, store); } - - -std::string DerivedPath::Opaque::to_string(const Store & store) const { +std::string DerivedPath::Opaque::to_string(const Store & store) const +{ return store.printStorePath(path); } -std::string DerivedPath::Built::to_string(const Store & store) const { +std::string DerivedPath::Built::to_string(const Store & store) const +{ return store.printStorePath(drvPath) + "!" + (outputs.empty() ? std::string { "*" } : concatStringsSep(",", outputs)); diff --git a/src/libstore/derived-path.hh b/src/libstore/derived-path.hh index b2d0956b8..878696136 100644 --- a/src/libstore/derived-path.hh +++ b/src/libstore/derived-path.hh @@ -125,7 +125,4 @@ struct BuiltPath : _BuiltPathRaw { typedef std::vector DerivedPaths; typedef std::vector BuiltPaths; -nlohmann::json builtPathsToJSON(const BuiltPaths & buildables, ref store); -nlohmann::json derivedPathsToJSON(const DerivedPaths & , ref store); - } diff --git a/src/nix/app.cc b/src/nix/app.cc index 48de8fb82..5658f2a52 100644 --- a/src/nix/app.cc +++ b/src/nix/app.cc @@ -37,11 +37,13 @@ struct InstallableDerivedPath : Installable * Return the rewrites that are needed to resolve a string whose context is * included in `dependencies`. */ -StringPairs resolveRewrites(Store & store, const BuiltPaths dependencies) +StringPairs resolveRewrites( + Store & store, + const std::vector & dependencies) { StringPairs res; for (auto & dep : dependencies) - if (auto drvDep = std::get_if(&dep)) + if (auto drvDep = std::get_if(&dep.path)) for (auto & [ outputName, outputPath ] : drvDep->outputs) res.emplace( downstreamPlaceholder(store, drvDep->drvPath, outputName), @@ -53,7 +55,10 @@ StringPairs resolveRewrites(Store & store, const BuiltPaths dependencies) /** * Resolve the given string assuming the given context. */ -std::string resolveString(Store & store, const std::string & toResolve, const BuiltPaths dependencies) +std::string resolveString( + Store & store, + const std::string & toResolve, + const std::vector & dependencies) { auto rewrites = resolveRewrites(store, dependencies); return rewriteStrings(toResolve, rewrites); diff --git a/src/nix/build.cc b/src/nix/build.cc index 2b91f8e0a..94b169167 100644 --- a/src/nix/build.cc +++ b/src/nix/build.cc @@ -10,6 +10,37 @@ using namespace nix; +nlohmann::json derivedPathsToJSON(const DerivedPaths & paths, ref store) +{ + auto res = nlohmann::json::array(); + for (auto & t : paths) { + std::visit([&res, store](const auto & t) { + res.push_back(t.toJSON(store)); + }, t.raw()); + } + return res; +} + +nlohmann::json builtPathsWithResultToJSON(const std::vector & buildables, ref store) +{ + auto res = nlohmann::json::array(); + for (auto & b : buildables) { + std::visit([&](const auto & t) { + auto j = t.toJSON(store); + if (b.result) { + j["startTime"] = b.result->startTime; + j["stopTime"] = b.result->stopTime; + if (b.result->cpuUser) + j["cpuUser"] = ((double) b.result->cpuUser->count()) / 1000000; + if (b.result->cpuSystem) + j["cpuSystem"] = ((double) b.result->cpuSystem->count()) / 1000000; + } + res.push_back(j); + }, b.path.raw()); + } + return res; +} + struct CmdBuild : InstallablesCommand, MixDryRun, MixJSON, MixProfile { Path outLink = "result"; @@ -78,7 +109,7 @@ struct CmdBuild : InstallablesCommand, MixDryRun, MixJSON, MixProfile Realise::Outputs, installables, buildMode); - if (json) logger->cout("%s", builtPathsToJSON(buildables, store).dump()); + if (json) logger->cout("%s", builtPathsWithResultToJSON(buildables, store).dump()); if (outLink != "") if (auto store2 = store.dynamic_pointer_cast()) @@ -98,7 +129,7 @@ struct CmdBuild : InstallablesCommand, MixDryRun, MixJSON, MixProfile store2->addPermRoot(output.second, absPath(symlink)); } }, - }, buildable.raw()); + }, buildable.path.raw()); } if (printOutputPaths) { @@ -113,11 +144,14 @@ struct CmdBuild : InstallablesCommand, MixDryRun, MixJSON, MixProfile std::cout << store->printStorePath(output.second) << std::endl; } }, - }, buildable.raw()); + }, buildable.path.raw()); } } - updateProfile(buildables); + BuiltPaths buildables2; + for (auto & b : buildables) + buildables2.push_back(b.path); + updateProfile(buildables2); } }; diff --git a/src/nix/profile.cc b/src/nix/profile.cc index 3814e7d5a..11910523d 100644 --- a/src/nix/profile.cc +++ b/src/nix/profile.cc @@ -253,11 +253,11 @@ struct ProfileManifest static std::map builtPathsPerInstallable( - const std::vector, BuiltPath>> & builtPaths) + const std::vector, BuiltPathWithResult>> & builtPaths) { std::map res; for (auto & [installable, builtPath] : builtPaths) - res[installable.get()].push_back(builtPath); + res[installable.get()].push_back(builtPath.path); return res; } From ec45f4b82eaef8da04f4b828b2b06a77aa3f986f Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 21 Nov 2022 11:12:45 +0100 Subject: [PATCH 23/36] Fix indentation --- src/libstore/local-store.cc | 50 ++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 04223d860..b67668e52 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -897,48 +897,48 @@ void LocalStore::queryPathInfoUncached(const StorePath & path, std::shared_ptr LocalStore::queryPathInfoInternal(State & state, const StorePath & path) { - /* Get the path info. */ + /* Get the path info. */ auto useQueryPathInfo(state.stmts->QueryPathInfo.use()(printStorePath(path))); - if (!useQueryPathInfo.next()) - return std::shared_ptr(); + if (!useQueryPathInfo.next()) + return std::shared_ptr(); - auto id = useQueryPathInfo.getInt(0); + auto id = useQueryPathInfo.getInt(0); - auto narHash = Hash::dummy; - try { - narHash = Hash::parseAnyPrefixed(useQueryPathInfo.getStr(1)); - } catch (BadHash & e) { - throw Error("invalid-path entry for '%s': %s", printStorePath(path), e.what()); - } + auto narHash = Hash::dummy; + try { + narHash = Hash::parseAnyPrefixed(useQueryPathInfo.getStr(1)); + } catch (BadHash & e) { + throw Error("invalid-path entry for '%s': %s", printStorePath(path), e.what()); + } - auto info = std::make_shared(path, narHash); + auto info = std::make_shared(path, narHash); - info->id = id; + info->id = id; - info->registrationTime = useQueryPathInfo.getInt(2); + info->registrationTime = useQueryPathInfo.getInt(2); auto s = (const char *) sqlite3_column_text(state.stmts->QueryPathInfo, 3); - if (s) info->deriver = parseStorePath(s); + if (s) info->deriver = parseStorePath(s); - /* Note that narSize = NULL yields 0. */ - info->narSize = useQueryPathInfo.getInt(4); + /* Note that narSize = NULL yields 0. */ + info->narSize = useQueryPathInfo.getInt(4); - info->ultimate = useQueryPathInfo.getInt(5) == 1; + info->ultimate = useQueryPathInfo.getInt(5) == 1; s = (const char *) sqlite3_column_text(state.stmts->QueryPathInfo, 6); - if (s) info->sigs = tokenizeString(s, " "); + if (s) info->sigs = tokenizeString(s, " "); s = (const char *) sqlite3_column_text(state.stmts->QueryPathInfo, 7); - if (s) info->ca = parseContentAddressOpt(s); + if (s) info->ca = parseContentAddressOpt(s); - /* Get the references. */ + /* Get the references. */ auto useQueryReferences(state.stmts->QueryReferences.use()(info->id)); - while (useQueryReferences.next()) - info->references.insert(parseStorePath(useQueryReferences.getStr(0))); + while (useQueryReferences.next()) + info->references.insert(parseStorePath(useQueryReferences.getStr(0))); - return info; + return info; } @@ -1041,9 +1041,9 @@ LocalStore::queryPartialDerivationOutputMap(const StorePath & path_) auto path = path_; auto outputs = retrySQLite>>([&]() { auto state(_state.lock()); - std::map> outputs; + std::map> outputs; uint64_t drvId; - drvId = queryValidPathId(*state, path); + drvId = queryValidPathId(*state, path); auto use(state->stmts->QueryDerivationOutputs.use()(drvId)); while (use.next()) outputs.insert_or_assign( From 82d5cf2a76ec009fd94a925c22a5e099a0b7321b Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 21 Nov 2022 11:45:41 +0100 Subject: [PATCH 24/36] Fix macOS build --- src/libstore/build/local-derivation-goal.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index f273ebe8a..34f8ab5f1 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -149,11 +149,15 @@ void LocalDerivationGoal::killChild() void LocalDerivationGoal::killSandbox(bool getStats) { if (cgroup) { + #if __linux__ auto stats = destroyCgroup(*cgroup); if (getStats) { buildResult.cpuUser = stats.cpuUser; buildResult.cpuSystem = stats.cpuSystem; } + #else + abort(); + #endif } else if (buildUser) { From 9d17ce07e872e88057480744414e0d1ef4fd5fa8 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 21 Nov 2022 12:55:49 +0100 Subject: [PATCH 25/36] AutoUserLock: If sandboxing is disabled, use the build users group We have to use a gid that has write access to the Nix store. --- src/libstore/build/local-derivation-goal.cc | 60 ++++++++++----------- src/libstore/lock.cc | 22 ++++---- src/libstore/lock.hh | 2 +- 3 files changed, 44 insertions(+), 40 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 34f8ab5f1..b7084384a 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -177,9 +177,38 @@ void LocalDerivationGoal::tryLocalBuild() { return; } + /* Are we doing a chroot build? */ + { + auto noChroot = parsedDrv->getBoolAttr("__noChroot"); + if (settings.sandboxMode == smEnabled) { + if (noChroot) + throw Error("derivation '%s' has '__noChroot' set, " + "but that's not allowed when 'sandbox' is 'true'", worker.store.printStorePath(drvPath)); +#if __APPLE__ + if (additionalSandboxProfile != "") + throw Error("derivation '%s' specifies a sandbox profile, " + "but this is only allowed when 'sandbox' is 'relaxed'", worker.store.printStorePath(drvPath)); +#endif + useChroot = true; + } + else if (settings.sandboxMode == smDisabled) + useChroot = false; + else if (settings.sandboxMode == smRelaxed) + useChroot = derivationType.isSandboxed() && !noChroot; + } + + auto & localStore = getLocalStore(); + if (localStore.storeDir != localStore.realStoreDir.get()) { + #if __linux__ + useChroot = true; + #else + throw Error("building using a diverted store is not supported on this platform"); + #endif + } + if (useBuildUsers()) { if (!buildUser) - buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1); + buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot); if (!buildUser) { if (!actLock) @@ -433,35 +462,6 @@ void LocalDerivationGoal::startBuilder() additionalSandboxProfile = parsedDrv->getStringAttr("__sandboxProfile").value_or(""); #endif - /* Are we doing a chroot build? */ - { - auto noChroot = parsedDrv->getBoolAttr("__noChroot"); - if (settings.sandboxMode == smEnabled) { - if (noChroot) - throw Error("derivation '%s' has '__noChroot' set, " - "but that's not allowed when 'sandbox' is 'true'", worker.store.printStorePath(drvPath)); -#if __APPLE__ - if (additionalSandboxProfile != "") - throw Error("derivation '%s' specifies a sandbox profile, " - "but this is only allowed when 'sandbox' is 'relaxed'", worker.store.printStorePath(drvPath)); -#endif - useChroot = true; - } - else if (settings.sandboxMode == smDisabled) - useChroot = false; - else if (settings.sandboxMode == smRelaxed) - useChroot = derivationType.isSandboxed() && !noChroot; - } - - auto & localStore = getLocalStore(); - if (localStore.storeDir != localStore.realStoreDir.get()) { - #if __linux__ - useChroot = true; - #else - throw Error("building using a diverted store is not supported on this platform"); - #endif - } - /* Create a temporary directory where the build will take place. */ tmpDir = createTempDir("", "nix-build-" + std::string(drvPath.name()), false, false, 0700); diff --git a/src/libstore/lock.cc b/src/libstore/lock.cc index 4fad3bfd2..3b93979a8 100644 --- a/src/libstore/lock.cc +++ b/src/libstore/lock.cc @@ -109,22 +109,18 @@ struct AutoUserLock : UserLock { AutoCloseFD fdUserLock; uid_t firstUid = 0; + gid_t firstGid = 0; uid_t nrIds = 1; uid_t getUID() override { assert(firstUid); return firstUid; } gid_t getUIDCount() override { return nrIds; } - gid_t getGID() override - { - // We use the same GID ranges as for the UIDs. - assert(firstUid); - return firstUid; - } + gid_t getGID() override { assert(firstGid); return firstGid; } std::vector getSupplementaryGIDs() override { return {}; } - static std::unique_ptr acquire(uid_t nrIds) + static std::unique_ptr acquire(uid_t nrIds, bool useChroot) { settings.requireExperimentalFeature(Xp::AutoAllocateUids); assert(settings.startId > 0); @@ -154,6 +150,14 @@ struct AutoUserLock : UserLock auto lock = std::make_unique(); lock->fdUserLock = std::move(fd); lock->firstUid = settings.startId + i * maxIdsPerBuild; + if (useChroot) + lock->firstGid = lock->firstUid; + else { + struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); + if (!gr) + throw Error("the group '%s' specified in 'build-users-group' does not exist", settings.buildUsersGroup); + lock->firstGid = gr->gr_gid; + } lock->nrIds = nrIds; return lock; } @@ -163,10 +167,10 @@ struct AutoUserLock : UserLock } }; -std::unique_ptr acquireUserLock(uid_t nrIds) +std::unique_ptr acquireUserLock(uid_t nrIds, bool useChroot) { if (settings.autoAllocateUids) - return AutoUserLock::acquire(nrIds); + return AutoUserLock::acquire(nrIds, useChroot); else return SimpleUserLock::acquire(); } diff --git a/src/libstore/lock.hh b/src/libstore/lock.hh index e7ceefab8..49ad86de7 100644 --- a/src/libstore/lock.hh +++ b/src/libstore/lock.hh @@ -31,7 +31,7 @@ struct UserLock /* Acquire a user lock for a UID range of size `nrIds`. Note that this may return nullptr if no user is available. */ -std::unique_ptr acquireUserLock(uid_t nrIds); +std::unique_ptr acquireUserLock(uid_t nrIds, bool useChroot); bool useBuildUsers(); From c776dfbb35e961ac3f011ab8665dfc85ab067ef8 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 21 Nov 2022 18:46:55 +0100 Subject: [PATCH 26/36] Use hex for startId Co-authored-by: Linus Heckemann --- src/libstore/globals.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 88fe72202..653d108aa 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -288,7 +288,7 @@ public: Setting startId{this, #if __linux__ - 872415232, + 0x34000000, #else 56930, #endif From b37c2d84b67635fc928ed174166f04d6f4d30c6b Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 22 Nov 2022 09:02:17 +0100 Subject: [PATCH 27/36] Always call setgroups() We shouldn't skip this if the supplementary group list is empty, because then the sandbox won't drop the supplementary groups of the parent (like "root"). --- src/libstore/build/local-derivation-goal.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index b7084384a..232440f74 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -1988,9 +1988,8 @@ void LocalDerivationGoal::runChild() if (setUser && buildUser) { /* Preserve supplementary groups of the build user, to allow admins to specify groups such as "kvm". */ - if (!buildUser->getSupplementaryGIDs().empty() && - setgroups(buildUser->getSupplementaryGIDs().size(), - buildUser->getSupplementaryGIDs().data()) == -1) + auto gids = buildUser->getSupplementaryGIDs(); + if (setgroups(gids.size(), gids.data()) == -1) throw SysError("cannot set supplementary groups of build user"); if (setgid(buildUser->getGID()) == -1 || From 3d23b9d0324ff415af9e5f35568aca98c04a90cc Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 22 Nov 2022 09:03:30 +0100 Subject: [PATCH 28/36] SimpleUserLock::getSupplementaryGIDs(): Filter out main gid This avoids having the user's gid in the supplementary group list as well. --- src/libstore/lock.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/libstore/lock.cc b/src/libstore/lock.cc index 3b93979a8..7459d837d 100644 --- a/src/libstore/lock.cc +++ b/src/libstore/lock.cc @@ -71,21 +71,22 @@ struct SimpleUserLock : UserLock user. This is usually either empty or contains a group such as "kvm". */ int ngroups = 32; // arbitrary initial guess - lock->supplementaryGIDs.resize(ngroups); + std::vector gids; + gids.resize(ngroups); int err = getgrouplist( pw->pw_name, pw->pw_gid, - lock->supplementaryGIDs.data(), + gids.data(), &ngroups); /* Our initial size of 32 wasn't sufficient, the correct size has been stored in ngroups, so we try again. */ if (err == -1) { - lock->supplementaryGIDs.resize(ngroups); + gids.resize(ngroups); err = getgrouplist( pw->pw_name, pw->pw_gid, - lock->supplementaryGIDs.data(), + gids.data(), &ngroups); } @@ -94,7 +95,9 @@ struct SimpleUserLock : UserLock throw Error("failed to get list of supplementary groups for '%s'", pw->pw_name); // Finally, trim back the GID list to its real size. - lock->supplementaryGIDs.resize(ngroups); + for (auto i = 0; i < ngroups; i++) + if (gids[i] != lock->gid) + lock->supplementaryGIDs.push_back(gids[i]); #endif return lock; From 989fc8a8b9cf98addbef85bf909be7b00b0462db Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 23 Nov 2022 15:24:50 +0100 Subject: [PATCH 29/36] Add release notes --- doc/manual/src/release-notes/rl-next.md | 40 +++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 2069e4578..47181fd39 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -10,3 +10,43 @@ This avoids a lot of spurious errors where some benign strings end-up having a context just because they are read from a store path ([#7260](https://github.com/NixOS/nix/pull/7260)). + +* Nix can now automatically pick UIDs for builds, removing the need to + create `nixbld*` user accounts. these UIDs are allocated starting at + 872415232 on Linux and 56930 on macOS. + + This is an experimental feature. To enable it, add the following to + `nix.conf`: + + ``` + extra-experimental-features = auto-allocate-uids + auto-allocate-uids = true + ``` + +* On Linux, Nix can now run builds in a user namespace where the build + runs as root (UID 0) and has 65,536 UIDs available. This is + primarily useful for running containers such as `systemd-nspawn` + inside a Nix build. + + A build can enable this by requiring the `uid-range` system feature, + i.e. by setting the derivation attribute + + ``` + requiredSystemFeatures = [ "uid-range" ]; + ``` + + The `uid-range` system feature requires the `auto-allocate-uids` + setting to be enabled (see above). + +* On Linux, Nix has experimental support for running builds inside a + cgroup. It can be enabled by adding + + ``` + extra-experimental-features = cgroups + ``` + + to `nix.conf`. It is also automatically enabled for builds that + require the `uid-range` system feature. + +* `nix build --json` now prints some statistics about top-level + derivations, such as CPU statistics when cgroups are enabled. From 2aa3f2e81020c1c780be6329e1133068779c8f08 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 23 Nov 2022 17:07:59 +0100 Subject: [PATCH 30/36] Include UID in hex --- doc/manual/src/release-notes/rl-next.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 47181fd39..4c91002fb 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -12,8 +12,8 @@ ([#7260](https://github.com/NixOS/nix/pull/7260)). * Nix can now automatically pick UIDs for builds, removing the need to - create `nixbld*` user accounts. these UIDs are allocated starting at - 872415232 on Linux and 56930 on macOS. + create `nixbld*` user accounts. These UIDs are allocated starting at + 872415232 (0x34000000) on Linux and 56930 on macOS. This is an experimental feature. To enable it, add the following to `nix.conf`: From f1b5c6876bc570ff9ac79410d8e47aadcb9aed52 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 27 Nov 2022 16:38:34 +0100 Subject: [PATCH 31/36] Add tests for auto-uid-allocation, uid-range and cgroups --- flake.nix | 6 ++++ tests/containers.nix | 68 ++++++++++++++++++++++++++++++++++++ tests/id-test.nix | 8 +++++ tests/systemd-nspawn.nix | 75 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 157 insertions(+) create mode 100644 tests/containers.nix create mode 100644 tests/id-test.nix create mode 100644 tests/systemd-nspawn.nix diff --git a/flake.nix b/flake.nix index cc2a48d9c..d9d01da10 100644 --- a/flake.nix +++ b/flake.nix @@ -506,6 +506,12 @@ overlay = self.overlays.default; }); + tests.containers = (import ./tests/containers.nix rec { + system = "x86_64-linux"; + inherit nixpkgs; + overlay = self.overlays.default; + }); + tests.setuid = nixpkgs.lib.genAttrs ["i686-linux" "x86_64-linux"] (system: diff --git a/tests/containers.nix b/tests/containers.nix new file mode 100644 index 000000000..d1e791b8c --- /dev/null +++ b/tests/containers.nix @@ -0,0 +1,68 @@ +# Test whether we can run a NixOS container inside a Nix build using systemd-nspawn. +{ nixpkgs, system, overlay }: + +with import (nixpkgs + "/nixos/lib/testing-python.nix") { + inherit system; + extraConfigurations = [ { nixpkgs.overlays = [ overlay ]; } ]; +}; + +makeTest ({ + name = "containers"; + + nodes = + { + host = + { config, lib, pkgs, nodes, ... }: + { virtualisation.writableStore = true; + virtualisation.diskSize = 2048; + virtualisation.additionalPaths = + [ pkgs.stdenv + (import ./systemd-nspawn.nix { inherit nixpkgs; }).toplevel + ]; + virtualisation.memorySize = 4096; + nix.binaryCaches = lib.mkForce [ ]; + nix.extraOptions = + '' + extra-experimental-features = nix-command auto-allocate-uids + extra-system-features = uid-range + ''; + nix.nixPath = [ "nixpkgs=${nixpkgs}" ]; + }; + }; + + testScript = { nodes }: '' + start_all() + + host.succeed("nix --version >&2") + + # Test that 'id' gives the expected result in various configurations. + + # Existing UIDs, sandbox. + host.succeed("nix build --no-auto-allocate-uids --sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-1") + host.succeed("[[ $(cat ./result) = 'uid=1000(nixbld) gid=100(nixbld) groups=100(nixbld)' ]]") + + # Existing UIDs, no sandbox. + host.succeed("nix build --no-auto-allocate-uids --no-sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-2") + host.succeed("[[ $(cat ./result) = 'uid=30001(nixbld1) gid=30000(nixbld) groups=30000(nixbld)' ]]") + + # Auto-allocated UIDs, sandbox. + host.succeed("nix build --auto-allocate-uids --sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-3") + host.succeed("[[ $(cat ./result) = 'uid=1000(nixbld) gid=100(nixbld) groups=100(nixbld)' ]]") + + # Auto-allocated UIDs, no sandbox. + host.succeed("nix build --auto-allocate-uids --no-sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-4") + host.succeed("[[ $(cat ./result) = 'uid=872415232 gid=30000(nixbld) groups=30000(nixbld)' ]]") + + # Auto-allocated UIDs, UID range, sandbox. + host.succeed("nix build --auto-allocate-uids --sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-5 --arg uidRange true") + host.succeed("[[ $(cat ./result) = 'uid=0(root) gid=0(root) groups=0(root)' ]]") + + # Auto-allocated UIDs, UID range, no sandbox. + host.fail("nix build --auto-allocate-uids --no-sandbox -L --offline --impure --file ${./id-test.nix} --argstr name id-test-6 --arg uidRange true") + + # Run systemd-nspawn in a Nix build. + host.succeed("nix build --auto-allocate-uids --sandbox -L --offline --impure --file ${./systemd-nspawn.nix} --argstr nixpkgs ${nixpkgs}") + host.succeed("[[ $(cat ./result/msg) = 'Hello World' ]]") + ''; + +}) diff --git a/tests/id-test.nix b/tests/id-test.nix new file mode 100644 index 000000000..8eb9d38f9 --- /dev/null +++ b/tests/id-test.nix @@ -0,0 +1,8 @@ +{ name, uidRange ? false }: + +with import {}; + +runCommand name + { requiredSystemFeatures = if uidRange then ["uid-range"] else []; + } + "id; id > $out" diff --git a/tests/systemd-nspawn.nix b/tests/systemd-nspawn.nix new file mode 100644 index 000000000..49944eba3 --- /dev/null +++ b/tests/systemd-nspawn.nix @@ -0,0 +1,75 @@ +{ nixpkgs }: + +let + + machine = { config, pkgs, ... }: + { + system.stateVersion = "22.05"; + boot.isContainer = true; + systemd.services.console-getty.enable = false; + networking.dhcpcd.enable = false; + + services.httpd = { + enable = true; + adminAddr = "nixos@example.org"; + }; + + systemd.services.test = { + wantedBy = [ "multi-user.target" ]; + after = [ "httpd.service" ]; + script = '' + source /.env + echo "Hello World" > $out/msg + ls -lR /dev > $out/dev + ${pkgs.curl}/bin/curl -sS --fail http://localhost/ > $out/page.html + ''; + unitConfig = { + FailureAction = "exit-force"; + FailureActionExitStatus = 42; + SuccessAction = "exit-force"; + }; + }; + }; + + config = (import (nixpkgs + "/nixos/lib/eval-config.nix") { + modules = [ machine ]; + }).config; + +in + +with import nixpkgs {}; + +runCommand "test" + { buildInputs = [ config.system.path ]; + requiredSystemFeatures = [ "uid-range" ]; + toplevel = config.system.build.toplevel; + } + '' + root=$(pwd)/root + mkdir -p $root $root/etc + + export > $root/.env + + # Make /run a tmpfs to shut up a systemd warning. + mkdir /run + mount -t tmpfs none /run + chmod 0700 /run + + mount -t cgroup2 none /sys/fs/cgroup + + mkdir -p $out + + touch /etc/os-release + echo a5ea3f98dedc0278b6f3cc8c37eeaeac > /etc/machine-id + + SYSTEMD_NSPAWN_UNIFIED_HIERARCHY=1 \ + ${config.systemd.package}/bin/systemd-nspawn \ + --keep-unit \ + -M ${config.networking.hostName} -D "$root" \ + --register=no \ + --resolv-conf=off \ + --bind-ro=/nix/store \ + --bind=$out \ + --private-network \ + $toplevel/init + '' From fc1458561086a6cf2c1311294c9089785288aea3 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 27 Nov 2022 18:58:21 +0100 Subject: [PATCH 32/36] Fix evaluation --- tests/systemd-nspawn.nix | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/systemd-nspawn.nix b/tests/systemd-nspawn.nix index 49944eba3..424436b3f 100644 --- a/tests/systemd-nspawn.nix +++ b/tests/systemd-nspawn.nix @@ -31,13 +31,16 @@ let }; }; - config = (import (nixpkgs + "/nixos/lib/eval-config.nix") { + cfg = (import (nixpkgs + "/nixos/lib/eval-config.nix") { modules = [ machine ]; - }).config; + system = "x86_64-linux"; + }); + + config = cfg.config; in -with import nixpkgs {}; +with cfg._module.args.pkgs; runCommand "test" { buildInputs = [ config.system.path ]; From ff12d1c1a1bb0dcea5a9ac6b8a5036d7e5dc11ca Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 28 Nov 2022 20:49:17 +0100 Subject: [PATCH 33/36] Check that auto-allocated UIDs don't clash with existing accounts --- src/libstore/lock.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/libstore/lock.cc b/src/libstore/lock.cc index 7459d837d..2858137d6 100644 --- a/src/libstore/lock.cc +++ b/src/libstore/lock.cc @@ -127,13 +127,10 @@ struct AutoUserLock : UserLock { settings.requireExperimentalFeature(Xp::AutoAllocateUids); assert(settings.startId > 0); - assert(settings.startId % maxIdsPerBuild == 0); assert(settings.uidCount % maxIdsPerBuild == 0); assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits::max()); assert(nrIds <= maxIdsPerBuild); - // FIXME: check whether the id range overlaps any known users - createDirs(settings.nixStateDir + "/userpool2"); size_t nrSlots = settings.uidCount / maxIdsPerBuild; @@ -150,11 +147,18 @@ struct AutoUserLock : UserLock throw SysError("opening user lock '%s'", fnUserLock); if (lockFile(fd.get(), ltWrite, false)) { + + auto firstUid = settings.startId + i * maxIdsPerBuild; + + auto pw = getpwuid(firstUid); + if (pw) + throw Error("auto-allocated UID %d clashes with existing user account '%s'", firstUid, pw->pw_name); + auto lock = std::make_unique(); lock->fdUserLock = std::move(fd); - lock->firstUid = settings.startId + i * maxIdsPerBuild; + lock->firstUid = firstUid; if (useChroot) - lock->firstGid = lock->firstUid; + lock->firstGid = firstUid; else { struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); if (!gr) From 67bcb99700a0da1395fa063d7c6586740b304598 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 28 Nov 2022 21:54:02 +0100 Subject: [PATCH 34/36] Add a setting for enabling cgroups --- doc/manual/src/release-notes/rl-next.md | 5 +++-- src/libstore/build/local-derivation-goal.cc | 7 ++++++- src/libstore/globals.hh | 23 +++++++++++++++------ tests/containers.nix | 2 +- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 4c91002fb..db2bd7419 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -43,10 +43,11 @@ ``` extra-experimental-features = cgroups + use-cgroups = true ``` - to `nix.conf`. It is also automatically enabled for builds that - require the `uid-range` system feature. + to `nix.conf`. Cgroups are required for derivations that require the + `uid-range` system feature. * `nix build --json` now prints some statistics about top-level derivations, such as CPU statistics when cgroups are enabled. diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index d44694890..69a7df411 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -401,9 +401,14 @@ static void linkOrCopy(const Path & from, const Path & to) void LocalDerivationGoal::startBuilder() { if ((buildUser && buildUser->getUIDCount() != 1) - || settings.isExperimentalFeatureEnabled(Xp::Cgroups)) + #if __linux__ + || settings.useCgroups + #endif + ) { #if __linux__ + settings.requireExperimentalFeature(Xp::Cgroups); + auto ourCgroups = getCgroups("/proc/self/cgroup"); auto ourCgroup = ourCgroups[""]; if (ourCgroup == "") diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 653d108aa..b40dcfa77 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -304,6 +304,17 @@ public: "id-count", "The number of UIDs/GIDs to use for dynamic ID allocation."}; + #if __linux__ + Setting useCgroups{ + this, false, "use-cgroups", + R"( + Whether to execute builds inside cgroups. Cgroups are + enabled automatically for derivations that require the + `uid-range` system feature. + )" + }; + #endif + Setting impersonateLinux26{this, false, "impersonate-linux-26", "Whether to impersonate a Linux 2.6 machine on newer kernels.", {"build-impersonate-linux-26"}}; @@ -592,10 +603,10 @@ public: cache) must have a signature by a trusted key. A trusted key is one listed in `trusted-public-keys`, or a public key counterpart to a private key stored in a file listed in `secret-key-files`. - + Set to `false` to disable signature checking and trust all non-content-addressed paths unconditionally. - + (Content-addressed paths are inherently trustworthy and thus unaffected by this configuration option.) )"}; @@ -681,7 +692,7 @@ public: is `root`. > **Warning** - > + > > Adding a user to `trusted-users` is essentially equivalent to > giving that user root access to the system. For example, the user > can set `sandbox-paths` and thereby obtain read access to @@ -771,13 +782,13 @@ public: The program executes with no arguments. The program's environment contains the following environment variables: - - `DRV_PATH` + - `DRV_PATH` The derivation for the built paths. Example: `/nix/store/5nihn1a7pa8b25l9zafqaqibznlvvp3f-bash-4.4-p23.drv` - - `OUT_PATHS` + - `OUT_PATHS` Output paths of the built derivation, separated by a space character. @@ -815,7 +826,7 @@ public: documentation](https://ec.haxx.se/usingcurl-netrc.html). > **Note** - > + > > This must be an absolute path, and `~` is not resolved. For > example, `~/.netrc` won't resolve to your home directory's > `.netrc`. diff --git a/tests/containers.nix b/tests/containers.nix index d1e791b8c..59e953c3b 100644 --- a/tests/containers.nix +++ b/tests/containers.nix @@ -23,7 +23,7 @@ makeTest ({ nix.binaryCaches = lib.mkForce [ ]; nix.extraOptions = '' - extra-experimental-features = nix-command auto-allocate-uids + extra-experimental-features = nix-command auto-allocate-uids cgroups extra-system-features = uid-range ''; nix.nixPath = [ "nixpkgs=${nixpkgs}" ]; From 7dd3e1fec47b9dd6aa6a0b9a58962078a8499453 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 28 Nov 2022 22:04:51 +0100 Subject: [PATCH 35/36] Add example --- doc/manual/src/release-notes/rl-next.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index db2bd7419..8b314b5f6 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -26,7 +26,8 @@ * On Linux, Nix can now run builds in a user namespace where the build runs as root (UID 0) and has 65,536 UIDs available. This is primarily useful for running containers such as `systemd-nspawn` - inside a Nix build. + inside a Nix build. For an example, see + https://github.com/NixOS/nix/blob/67bcb99700a0da1395fa063d7c6586740b304598/tests/systemd-nspawn.nix. A build can enable this by requiring the `uid-range` system feature, i.e. by setting the derivation attribute From 4f762e2b023fd451fdbab0de8d6394dd7201640d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 29 Nov 2022 13:10:53 +0100 Subject: [PATCH 36/36] Restore ownership of / for non-uid-range builds --- src/libstore/build/local-derivation-goal.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 69a7df411..359966288 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -646,8 +646,7 @@ void LocalDerivationGoal::startBuilder() if (mkdir(chrootRootDir.c_str(), buildUser && buildUser->getUIDCount() != 1 ? 0755 : 0750) == -1) throw SysError("cannot create '%1%'", chrootRootDir); - // FIXME: only make root writable for user namespace builds. - if (buildUser && chown(chrootRootDir.c_str(), buildUser->getUID(), buildUser->getGID()) == -1) + if (buildUser && chown(chrootRootDir.c_str(), buildUser->getUIDCount() != 1 ? buildUser->getUID() : 0, buildUser->getGID()) == -1) throw SysError("cannot change ownership of '%1%'", chrootRootDir); /* Create a writable /tmp in the chroot. Many builders need