From 167d12b02cc8cadfaf7c28959532030d65687a8f Mon Sep 17 00:00:00 2001 From: Shea Levy Date: Mon, 18 Jul 2016 18:50:27 -0400 Subject: [PATCH 1/4] build-remote: Implement in C++ --- .gitignore | 2 + Makefile | 1 + src/build-remote/build-remote.cc | 280 +++++++++++++++++++++++++++++++ src/build-remote/local.mk | 11 ++ src/libstore/derivations.cc | 3 - src/libstore/pathlocks.cc | 2 + src/libstore/ssh-store.cc | 2 + src/libstore/store-api.cc | 33 ++++ src/libstore/store-api.hh | 2 + src/libutil/util.hh | 3 + src/nix/copy.cc | 28 +--- tests/remote-builds.nix | 1 + 12 files changed, 338 insertions(+), 30 deletions(-) create mode 100644 src/build-remote/build-remote.cc create mode 100644 src/build-remote/local.mk diff --git a/.gitignore b/.gitignore index 9b4ae6e15..92f95fe1f 100644 --- a/.gitignore +++ b/.gitignore @@ -81,6 +81,8 @@ Makefile.config # /src/nix-build/ /src/nix-build/nix-build +/src/build-remote/build-remote + # /tests/ /tests/test-tmp /tests/common.sh diff --git a/Makefile b/Makefile index 2ee40b56b..8815d5c8e 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ makefiles = \ src/resolve-system-dependencies/local.mk \ src/nix-channel/local.mk \ src/nix-build/local.mk \ + src/build-remote/local.mk \ perl/local.mk \ scripts/local.mk \ corepkgs/local.mk \ diff --git a/src/build-remote/build-remote.cc b/src/build-remote/build-remote.cc new file mode 100644 index 000000000..03bbe68b2 --- /dev/null +++ b/src/build-remote/build-remote.cc @@ -0,0 +1,280 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "shared.hh" +#include "pathlocks.hh" +#include "globals.hh" +#include "serve-protocol.hh" +#include "serialise.hh" +#include "store-api.hh" +#include "derivations.hh" + +using namespace nix; +using std::cerr; +using std::cin; + +static void handle_alarm(int sig) { +} + +class machine { + const std::vector supportedFeatures; + const std::vector mandatoryFeatures; + +public: + const string hostName; + const std::vector systemTypes; + const string sshKey; + const unsigned long long maxJobs; + const unsigned long long speedFactor; + bool enabled; + + bool allSupported(const std::vector & features) const { + return std::all_of(features.begin(), features.end(), + [&](const string & feature) { + return std::find(supportedFeatures.begin(), + supportedFeatures.end(), + feature) != supportedFeatures.end() || + std::find(mandatoryFeatures.begin(), + mandatoryFeatures.end(), + feature) != mandatoryFeatures.end(); + }); + } + + bool mandatoryMet(const std::vector & features) const { + return std::all_of(mandatoryFeatures.begin(), mandatoryFeatures.end(), + [&](const string & feature) { + return std::find(features.begin(), features.end(), feature) != features.end(); + }); + } + + machine(decltype(hostName) hostName, + decltype(systemTypes) systemTypes, + decltype(sshKey) sshKey, + decltype(maxJobs) maxJobs, + decltype(speedFactor) speedFactor, + decltype(supportedFeatures) supportedFeatures, + decltype(mandatoryFeatures) mandatoryFeatures) : + supportedFeatures{std::move(supportedFeatures)}, + mandatoryFeatures{std::move(mandatoryFeatures)}, + hostName{std::move(hostName)}, + systemTypes{std::move(systemTypes)}, + sshKey{std::move(sshKey)}, + maxJobs{std::move(maxJobs)}, + speedFactor{speedFactor == 0 ? 1 : std::move(speedFactor)}, + enabled{true} {}; +};; + +static std::vector read_conf() { + auto conf = getEnv("NIX_REMOTE_SYSTEMS", SYSCONFDIR "/nix/machines"); + + auto machines = std::vector{}; + auto confFile = std::ifstream{conf}; + if (confFile.good()) { + confFile.exceptions(std::ifstream::badbit); + for (string line; getline(confFile, line);) { + chomp(line); + line.erase(std::find(line.begin(), line.end(), '#'), line.end()); + if (line.empty()) { + continue; + } + auto tokens = tokenizeString>(line); + auto sz = tokens.size(); + if (sz < 4) { + throw new FormatError(format("Bad machines.conf file %1%") + % conf); + } + machines.emplace_back(tokens[0], + tokenizeString>(tokens[1], ","), + tokens[2], + stoull(tokens[3]), + sz >= 5 ? stoull(tokens[4]) : 1LL, + sz >= 6 ? + tokenizeString>(tokens[5], ",") : + std::vector{}, + sz >= 7 ? + tokenizeString>(tokens[6], ",") : + std::vector{}); + } + } + confFile.close(); + return machines; +} + +static string currentLoad; + +static int openSlotLock(const machine & m, unsigned long long slot) { + auto fn_stream = std::stringstream(currentLoad, std::ios_base::ate | std::ios_base::out); + fn_stream << "/"; + for (auto t : m.systemTypes) { + fn_stream << t << "-"; + } + fn_stream << m.hostName << "-" << slot; + return openLockFile(fn_stream.str(), true); +} + +static char display_env[] = "DISPLAY="; +static char ssh_env[] = "SSH_ASKPASS="; + +int main (int argc, char * * argv) +{ + return handleExceptions(argv[0], [&]() { + initNix(); + /* Ensure we don't get any SSH passphrase or host key popups. */ + if (putenv(display_env) == -1 || + putenv(ssh_env) == -1) { + throw SysError("Setting SSH env vars"); + } + + if (argc != 4) { + throw UsageError("called without required arguments"); + } + + auto store = openStore(); + + auto localSystem = argv[1]; + settings.maxSilentTime = strtoull(argv[2], NULL, 10); + settings.buildTimeout = strtoull(argv[3], NULL, 10); + + currentLoad = getEnv("NIX_CURRENT_LOAD", "/run/nix/current-load"); + + std::shared_ptr sshStore; + AutoCloseFD bestSlotLock; + + auto machines = read_conf(); + string drvPath; + string hostName; + for (string line; getline(cin, line);) { + auto tokens = tokenizeString>(line); + auto sz = tokens.size(); + if (sz != 3 && sz != 4) { + throw Error(format("invalid build hook line %1%") % line); + } + auto amWilling = tokens[0] == "1"; + auto neededSystem = tokens[1]; + drvPath = tokens[2]; + auto requiredFeatures = sz == 3 ? + std::vector{} : + tokenizeString>(tokens[3], ","); + auto canBuildLocally = amWilling && (neededSystem == localSystem); + + /* Error ignored here, will be caught later */ + mkdir(currentLoad.c_str(), 0777); + + while (true) { + bestSlotLock = -1; + AutoCloseFD lock = openLockFile(currentLoad + "/main-lock", true); + lockFile(lock.get(), ltWrite, true); + + bool rightType = false; + + machine * bestMachine = nullptr; + unsigned long long bestLoad = 0; + for (auto & m : machines) { + if (m.enabled && std::find(m.systemTypes.begin(), + m.systemTypes.end(), + neededSystem) != m.systemTypes.end() && + m.allSupported(requiredFeatures) && + m.mandatoryMet(requiredFeatures)) { + rightType = true; + AutoCloseFD free; + unsigned long long load = 0; + for (unsigned long long slot = 0; slot < m.maxJobs; ++slot) { + AutoCloseFD slotLock = openSlotLock(m, slot); + if (lockFile(slotLock.get(), ltWrite, false)) { + if (!free) { + free = std::move(slotLock); + } + } else { + ++load; + } + } + if (!free) { + continue; + } + bool best = false; + if (!bestSlotLock) { + best = true; + } else if (load / m.speedFactor < bestLoad / bestMachine->speedFactor) { + best = true; + } else if (load / m.speedFactor == bestLoad / bestMachine->speedFactor) { + if (m.speedFactor > bestMachine->speedFactor) { + best = true; + } else if (m.speedFactor == bestMachine->speedFactor) { + if (load < bestLoad) { + best = true; + } + } + } + if (best) { + bestLoad = load; + bestSlotLock = std::move(free); + bestMachine = &m; + } + } + } + + if (!bestSlotLock) { + if (rightType && !canBuildLocally) { + cerr << "# postpone\n"; + } else { + cerr << "# decline\n"; + } + break; + } + + futimens(bestSlotLock.get(), NULL); + + lock = -1; + + try { + sshStore = openStore("ssh://" + bestMachine->hostName + "?key=" + bestMachine->sshKey); + hostName = bestMachine->hostName; + } catch (std::exception & e) { + cerr << e.what() << '\n'; + cerr << "unable to open SSH connection to ‘" << bestMachine->hostName << "’, trying other available machines...\n"; + bestMachine->enabled = false; + continue; + } + goto connected; + } + } +connected: + cerr << "# accept\n"; + string line; + if (!getline(cin, line)) { + throw Error("hook caller didn't send inputs"); + } + auto inputs = tokenizeString>(line); + if (!getline(cin, line)) { + throw Error("hook caller didn't send outputs"); + } + auto outputs = tokenizeString(line); + AutoCloseFD uploadLock = openLockFile(currentLoad + "/" + hostName + ".upload-lock", true); + auto old = signal(SIGALRM, handle_alarm); + alarm(15 * 60); + if (!lockFile(uploadLock.get(), ltWrite, true)) { + cerr << "somebody is hogging the upload lock for " << hostName << ", continuing...\n"; + } + alarm(0); + signal(SIGALRM, old); + copyPaths(store, ref(sshStore), inputs); + uploadLock = -1; + + cerr << "building ‘" << drvPath << "’ on ‘" << hostName << "’\n"; + sshStore->buildDerivation(drvPath, readDerivation(drvPath)); + + std::remove_if(outputs.begin(), outputs.end(), [=](const Path & path) { return store->isValidPath(path); }); + if (!outputs.empty()) { + setenv("NIX_HELD_LOCKS", concatStringsSep(" ", outputs).c_str(), 1); /* FIXME: ugly */ + copyPaths(ref(sshStore), store, outputs); + } + return; + }); +} diff --git a/src/build-remote/local.mk b/src/build-remote/local.mk new file mode 100644 index 000000000..05b8cb451 --- /dev/null +++ b/src/build-remote/local.mk @@ -0,0 +1,11 @@ +programs += build-remote + +build-remote_DIR := $(d) + +build-remote_INSTALL_DIR := $(libexecdir)/nix + +build-remote_LIBS = libmain libutil libformat libstore + +build-remote_SOURCES := $(d)/build-remote.cc + +build-remote_CXXFLAGS = -DSYSCONFDIR="\"$(sysconfdir)\"" -Isrc/nix-store diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index d934bda38..79526c594 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -88,9 +88,6 @@ Path writeDerivation(ref store, } -MakeError(FormatError, Error) - - /* Read string `s' from stream `str'. */ static void expect(std::istream & str, const string & s) { diff --git a/src/libstore/pathlocks.cc b/src/libstore/pathlocks.cc index 8788ee164..8fc862073 100644 --- a/src/libstore/pathlocks.cc +++ b/src/libstore/pathlocks.cc @@ -53,6 +53,8 @@ bool lockFile(int fd, LockType lockType, bool wait) checkInterrupt(); if (errno != EINTR) throw SysError(format("acquiring/releasing lock")); + else + return false; } } else { while (fcntl(fd, F_SETLK, &lock) != 0) { diff --git a/src/libstore/ssh-store.cc b/src/libstore/ssh-store.cc index 516648522..3d0159400 100644 --- a/src/libstore/ssh-store.cc +++ b/src/libstore/ssh-store.cc @@ -49,6 +49,8 @@ SSHStore::SSHStore(string uri, const Params & params, size_t maxConnections) , uri(std::move(uri)) , key(get(params, "ssh-key", "")) { + /* open a connection and perform the handshake to verify all is well */ + connections->get(); } string SSHStore::getUri() diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 37a2d45fe..8fdd62771 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -3,6 +3,7 @@ #include "store-api.hh" #include "util.hh" #include "nar-info-disk-cache.hh" +#include "thread-pool.hh" #include @@ -698,4 +699,36 @@ std::list> getDefaultSubstituters() } +void copyPaths(ref from, ref to, const Paths & storePaths) +{ + std::string copiedLabel = "copied"; + + logger->setExpected(copiedLabel, storePaths.size()); + + ThreadPool pool; + + processGraph(pool, + PathSet(storePaths.begin(), storePaths.end()), + + [&](const Path & storePath) { + return from->queryPathInfo(storePath)->references; + }, + + [&](const Path & storePath) { + checkInterrupt(); + + if (!to->isValidPath(storePath)) { + Activity act(*logger, lvlInfo, format("copying ‘%s’...") % storePath); + + copyStorePath(from, to, storePath); + + logger->incProgress(copiedLabel); + } else + logger->incExpected(copiedLabel, -1); + }); + + pool.process(); +} + + } diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index f6bbc9a84..32523dc78 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -608,6 +608,8 @@ void removeTempRoots(); ref openStore(const std::string & uri = getEnv("NIX_REMOTE")); +void copyPaths(ref from, ref to, const Paths & storePaths); + enum StoreType { tDaemon, tLocal, diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 50b96f7ed..2e48034ae 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -292,6 +292,9 @@ void inline checkInterrupt() MakeError(Interrupted, BaseError) +MakeError(FormatError, Error) + + /* String tokenizer. */ template C tokenizeString(const string & s, const string & separators = " \t\n\r"); diff --git a/src/nix/copy.cc b/src/nix/copy.cc index e8317dc39..976b0d3e0 100644 --- a/src/nix/copy.cc +++ b/src/nix/copy.cc @@ -46,33 +46,7 @@ struct CmdCopy : StorePathsCommand ref srcStore = srcUri.empty() ? store : openStore(srcUri); ref dstStore = dstUri.empty() ? store : openStore(dstUri); - std::string copiedLabel = "copied"; - - logger->setExpected(copiedLabel, storePaths.size()); - - ThreadPool pool; - - processGraph(pool, - PathSet(storePaths.begin(), storePaths.end()), - - [&](const Path & storePath) { - return srcStore->queryPathInfo(storePath)->references; - }, - - [&](const Path & storePath) { - checkInterrupt(); - - if (!dstStore->isValidPath(storePath)) { - Activity act(*logger, lvlInfo, format("copying ‘%s’...") % storePath); - - copyStorePath(srcStore, dstStore, storePath); - - logger->incProgress(copiedLabel); - } else - logger->incExpected(copiedLabel, -1); - }); - - pool.process(); + copyPaths(srcStore, dstStore, storePaths); } }; diff --git a/tests/remote-builds.nix b/tests/remote-builds.nix index 34276e7d6..d14d6ff7f 100644 --- a/tests/remote-builds.nix +++ b/tests/remote-builds.nix @@ -43,6 +43,7 @@ in { config, pkgs, ... }: { nix.maxJobs = 0; # force remote building nix.distributedBuilds = true; + nix.envVars = pkgs.lib.mkAfter { NIX_BUILD_HOOK = "${nix}/libexec/nix/build-remote"; }; nix.buildMachines = [ { hostName = "slave1"; sshUser = "root"; From bff3ad767ec21b37f291a2c4ae0eac7d91470ce5 Mon Sep 17 00:00:00 2001 From: Shea Levy Date: Fri, 29 Jul 2016 08:05:36 -0400 Subject: [PATCH 2/4] build-remote: replace strtoull with stoull to take advantage of C++ error handling --- src/build-remote/build-remote.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/build-remote/build-remote.cc b/src/build-remote/build-remote.cc index 03bbe68b2..82e5bb225 100644 --- a/src/build-remote/build-remote.cc +++ b/src/build-remote/build-remote.cc @@ -139,8 +139,8 @@ int main (int argc, char * * argv) auto store = openStore(); auto localSystem = argv[1]; - settings.maxSilentTime = strtoull(argv[2], NULL, 10); - settings.buildTimeout = strtoull(argv[3], NULL, 10); + settings.maxSilentTime = stoull(string(argv[2])); + settings.buildTimeout = stoull(string(argv[3])); currentLoad = getEnv("NIX_CURRENT_LOAD", "/run/nix/current-load"); From d771c28613908aa8ac6aa72ceeca9048fbbcb776 Mon Sep 17 00:00:00 2001 From: Shea Levy Date: Tue, 10 Jan 2017 10:29:06 -0500 Subject: [PATCH 3/4] build-remote: Use std::set for feature sets --- src/build-remote/build-remote.cc | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/build-remote/build-remote.cc b/src/build-remote/build-remote.cc index 82e5bb225..cf1062636 100644 --- a/src/build-remote/build-remote.cc +++ b/src/build-remote/build-remote.cc @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include @@ -24,8 +24,8 @@ static void handle_alarm(int sig) { } class machine { - const std::vector supportedFeatures; - const std::vector mandatoryFeatures; + const std::set supportedFeatures; + const std::set mandatoryFeatures; public: const string hostName; @@ -35,22 +35,18 @@ public: const unsigned long long speedFactor; bool enabled; - bool allSupported(const std::vector & features) const { + bool allSupported(const std::set & features) const { return std::all_of(features.begin(), features.end(), [&](const string & feature) { - return std::find(supportedFeatures.begin(), - supportedFeatures.end(), - feature) != supportedFeatures.end() || - std::find(mandatoryFeatures.begin(), - mandatoryFeatures.end(), - feature) != mandatoryFeatures.end(); + return supportedFeatures.count(feature) || + mandatoryFeatures.count(feature); }); } - bool mandatoryMet(const std::vector & features) const { + bool mandatoryMet(const std::set & features) const { return std::all_of(mandatoryFeatures.begin(), mandatoryFeatures.end(), [&](const string & feature) { - return std::find(features.begin(), features.end(), feature) != features.end(); + return features.count(feature); }); } @@ -96,11 +92,11 @@ static std::vector read_conf() { stoull(tokens[3]), sz >= 5 ? stoull(tokens[4]) : 1LL, sz >= 6 ? - tokenizeString>(tokens[5], ",") : - std::vector{}, + tokenizeString>(tokens[5], ",") : + std::set{}, sz >= 7 ? - tokenizeString>(tokens[6], ",") : - std::vector{}); + tokenizeString>(tokens[6], ",") : + std::set{}); } } confFile.close(); @@ -160,8 +156,8 @@ int main (int argc, char * * argv) auto neededSystem = tokens[1]; drvPath = tokens[2]; auto requiredFeatures = sz == 3 ? - std::vector{} : - tokenizeString>(tokens[3], ","); + std::set{} : + tokenizeString>(tokens[3], ","); auto canBuildLocally = amWilling && (neededSystem == localSystem); /* Error ignored here, will be caught later */ From 28db29786277ce6790ffb1567f9e679c62737b96 Mon Sep 17 00:00:00 2001 From: Shea Levy Date: Tue, 10 Jan 2017 10:36:26 -0500 Subject: [PATCH 4/4] build-remote: Don't use C++ streams to read the conf file --- src/build-remote/build-remote.cc | 60 ++++++++++++++++---------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/build-remote/build-remote.cc b/src/build-remote/build-remote.cc index cf1062636..98ccc3ddc 100644 --- a/src/build-remote/build-remote.cc +++ b/src/build-remote/build-remote.cc @@ -1,8 +1,6 @@ #include #include #include -#include -#include #include #include #include @@ -71,35 +69,37 @@ static std::vector read_conf() { auto conf = getEnv("NIX_REMOTE_SYSTEMS", SYSCONFDIR "/nix/machines"); auto machines = std::vector{}; - auto confFile = std::ifstream{conf}; - if (confFile.good()) { - confFile.exceptions(std::ifstream::badbit); - for (string line; getline(confFile, line);) { - chomp(line); - line.erase(std::find(line.begin(), line.end(), '#'), line.end()); - if (line.empty()) { - continue; - } - auto tokens = tokenizeString>(line); - auto sz = tokens.size(); - if (sz < 4) { - throw new FormatError(format("Bad machines.conf file %1%") - % conf); - } - machines.emplace_back(tokens[0], - tokenizeString>(tokens[1], ","), - tokens[2], - stoull(tokens[3]), - sz >= 5 ? stoull(tokens[4]) : 1LL, - sz >= 6 ? - tokenizeString>(tokens[5], ",") : - std::set{}, - sz >= 7 ? - tokenizeString>(tokens[6], ",") : - std::set{}); - } + auto lines = std::vector{}; + try { + lines = tokenizeString>(readFile(conf), "\n"); + } catch (const SysError & e) { + if (e.errNo != ENOENT) + throw; + } + for (auto line : lines) { + chomp(line); + line.erase(std::find(line.begin(), line.end(), '#'), line.end()); + if (line.empty()) { + continue; + } + auto tokens = tokenizeString>(line); + auto sz = tokens.size(); + if (sz < 4) { + throw new FormatError(format("Bad machines.conf file %1%") + % conf); + } + machines.emplace_back(tokens[0], + tokenizeString>(tokens[1], ","), + tokens[2], + stoull(tokens[3]), + sz >= 5 ? stoull(tokens[4]) : 1LL, + sz >= 6 ? + tokenizeString>(tokens[5], ",") : + std::set{}, + sz >= 7 ? + tokenizeString>(tokens[6], ",") : + std::set{}); } - confFile.close(); return machines; }