lix/src/build-remote/build-remote.cc

300 lines
11 KiB
C++
Raw Normal View History

2016-07-18 22:50:27 +00:00
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <set>
2016-07-18 22:50:27 +00:00
#include <memory>
#include <tuple>
#include <iomanip>
2017-01-24 12:57:26 +00:00
#if __APPLE__
#include <sys/time.h>
#endif
2016-07-18 22:50:27 +00:00
#include "machines.hh"
2016-07-18 22:50:27 +00:00
#include "shared.hh"
#include "pathlocks.hh"
#include "globals.hh"
#include "serialise.hh"
#include "store-api.hh"
#include "derivations.hh"
#include "local-store.hh"
#include "legacy.hh"
2016-07-18 22:50:27 +00:00
using namespace nix;
using std::cin;
2017-03-03 15:18:49 +00:00
static void handleAlarm(int sig) {
2016-07-18 22:50:27 +00:00
}
std::string escapeUri(std::string uri)
{
std::replace(uri.begin(), uri.end(), '/', '_');
return uri;
}
2016-07-18 22:50:27 +00:00
static string currentLoad;
2020-07-30 11:10:49 +00:00
static AutoCloseFD openSlotLock(const Machine & m, uint64_t slot)
{
return openLockFile(fmt("%s/%s-%d", currentLoad, escapeUri(m.storeUri), slot), true);
2016-07-18 22:50:27 +00:00
}
static bool allSupportedLocally(Store & store, const std::set<std::string>& requiredFeatures) {
for (auto & feature : requiredFeatures)
if (!store.systemFeatures.get().count(feature)) return false;
return true;
}
static int main_build_remote(int argc, char * * argv)
2016-07-18 22:50:27 +00:00
{
{
logger = makeJSONLogger(*logger);
2016-07-18 22:50:27 +00:00
/* Ensure we don't get any SSH passphrase or host key popups. */
unsetenv("DISPLAY");
unsetenv("SSH_ASKPASS");
2016-07-18 22:50:27 +00:00
if (argc != 2)
2016-07-18 22:50:27 +00:00
throw UsageError("called without required arguments");
verbosity = (Verbosity) std::stoll(argv[1]);
FdSource source(STDIN_FILENO);
/* Read the parent's settings. */
while (readInt(source)) {
auto name = readString(source);
auto value = readString(source);
settings.set(name, value);
}
settings.maxBuildJobs.set("1"); // hack to make tests with local?root= work
2016-07-18 22:50:27 +00:00
initPlugins();
auto store = openStore();
2016-07-18 22:50:27 +00:00
/* It would be more appropriate to use $XDG_RUNTIME_DIR, since
2017-07-30 10:28:50 +00:00
that gets cleared on reboot, but it wouldn't work on macOS. */
auto currentLoadName = "/current-load";
if (auto localStore = store.dynamic_pointer_cast<LocalFSStore>())
currentLoad = std::string { localStore->stateDir } + currentLoadName;
else
currentLoad = settings.nixStateDir + currentLoadName;
2016-07-18 22:50:27 +00:00
std::shared_ptr<Store> sshStore;
AutoCloseFD bestSlotLock;
auto machines = getMachines();
2017-05-01 12:43:14 +00:00
debug("got %d remote builders", machines.size());
if (machines.empty()) {
std::cerr << "# decline-permanently\n";
return 0;
}
std::optional<StorePath> drvPath;
string storeUri;
while (true) {
try {
auto s = readString(source);
if (s != "try") return 0;
} catch (EndOfFile &) { return 0; }
auto amWilling = readInt(source);
auto neededSystem = readString(source);
drvPath = store->parseStorePath(readString(source));
auto requiredFeatures = readStrings<std::set<std::string>>(source);
2020-08-05 16:58:00 +00:00
auto canBuildLocally = amWilling
&& ( neededSystem == settings.thisSystem
|| settings.extraPlatforms.get().count(neededSystem) > 0)
&& allSupportedLocally(*store, requiredFeatures);
2016-07-18 22:50:27 +00:00
/* Error ignored here, will be caught later */
mkdir(currentLoad.c_str(), 0777);
while (true) {
bestSlotLock = -1;
AutoCloseFD lock = openLockFile(currentLoad + "/main-lock", true);
lockFile(lock.get(), ltWrite, true);
bool rightType = false;
2017-03-03 15:18:49 +00:00
Machine * bestMachine = nullptr;
2020-07-30 11:10:49 +00:00
uint64_t bestLoad = 0;
2016-07-18 22:50:27 +00:00
for (auto & m : machines) {
debug("considering building on remote machine '%s'", m.storeUri);
2016-07-18 22:50:27 +00:00
if (m.enabled && std::find(m.systemTypes.begin(),
m.systemTypes.end(),
neededSystem) != m.systemTypes.end() &&
m.allSupported(requiredFeatures) &&
m.mandatoryMet(requiredFeatures)) {
rightType = true;
AutoCloseFD free;
2020-07-30 11:10:49 +00:00
uint64_t load = 0;
for (uint64_t slot = 0; slot < m.maxJobs; ++slot) {
2017-01-25 11:51:35 +00:00
auto slotLock = openSlotLock(m, slot);
2016-07-18 22:50:27 +00:00
if (lockFile(slotLock.get(), ltWrite, false)) {
if (!free) {
free = std::move(slotLock);
}
} else {
++load;
}
}
if (!free) {
continue;
}
bool best = false;
if (!bestSlotLock) {
best = true;
} else if (load / m.speedFactor < bestLoad / bestMachine->speedFactor) {
best = true;
} else if (load / m.speedFactor == bestLoad / bestMachine->speedFactor) {
if (m.speedFactor > bestMachine->speedFactor) {
best = true;
} else if (m.speedFactor == bestMachine->speedFactor) {
if (load < bestLoad) {
best = true;
}
}
}
if (best) {
bestLoad = load;
bestSlotLock = std::move(free);
bestMachine = &m;
}
}
}
if (!bestSlotLock) {
2017-03-03 15:18:49 +00:00
if (rightType && !canBuildLocally)
std::cerr << "# postpone\n";
else
2020-08-05 16:58:00 +00:00
{
// build the hint template.
Improve error formatting Changes: * The divider lines are gone. These were in practice a bit confusing, in particular with --show-trace or --keep-going, since then there were multiple lines, suggesting a start/end which wasn't the case. * Instead, multi-line error messages are now indented to align with the prefix (e.g. "error: "). * The 'description' field is gone since we weren't really using it. * 'hint' is renamed to 'msg' since it really wasn't a hint. * The error is now printed *before* the location info. * The 'name' field is no longer printed since most of the time it wasn't very useful since it was just the name of the exception (like EvalError). Ideally in the future this would be a unique, easily googleable error ID (like rustc). * "trace:" is now just "…". This assumes error contexts start with something like "while doing X". Example before: error: --- AssertionError ---------------------------------------------------------------------------------------- nix at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| assertion 'false' failed ----------------------------------------------------- show-trace ----------------------------------------------------- trace: while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) { Example after: error: assertion 'false' failed at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| … while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) {
2021-01-20 23:27:36 +00:00
string errorText =
"Failed to find a machine for remote build!\n"
"derivation: %s\nrequired (system, features): (%s, %s)";
errorText += "\n%s available machines:";
errorText += "\n(systems, maxjobs, supportedFeatures, mandatoryFeatures)";
2020-08-05 16:58:00 +00:00
Improve error formatting Changes: * The divider lines are gone. These were in practice a bit confusing, in particular with --show-trace or --keep-going, since then there were multiple lines, suggesting a start/end which wasn't the case. * Instead, multi-line error messages are now indented to align with the prefix (e.g. "error: "). * The 'description' field is gone since we weren't really using it. * 'hint' is renamed to 'msg' since it really wasn't a hint. * The error is now printed *before* the location info. * The 'name' field is no longer printed since most of the time it wasn't very useful since it was just the name of the exception (like EvalError). Ideally in the future this would be a unique, easily googleable error ID (like rustc). * "trace:" is now just "…". This assumes error contexts start with something like "while doing X". Example before: error: --- AssertionError ---------------------------------------------------------------------------------------- nix at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| assertion 'false' failed ----------------------------------------------------- show-trace ----------------------------------------------------- trace: while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) { Example after: error: assertion 'false' failed at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| … while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) {
2021-01-20 23:27:36 +00:00
for (unsigned int i = 0; i < machines.size(); ++i)
errorText += "\n(%s, %s, %s, %s)";
2020-08-05 16:58:00 +00:00
// add the template values.
2020-08-05 17:26:06 +00:00
string drvstr;
if (drvPath.has_value())
drvstr = drvPath->to_string();
else
drvstr = "<unknown>";
Improve error formatting Changes: * The divider lines are gone. These were in practice a bit confusing, in particular with --show-trace or --keep-going, since then there were multiple lines, suggesting a start/end which wasn't the case. * Instead, multi-line error messages are now indented to align with the prefix (e.g. "error: "). * The 'description' field is gone since we weren't really using it. * 'hint' is renamed to 'msg' since it really wasn't a hint. * The error is now printed *before* the location info. * The 'name' field is no longer printed since most of the time it wasn't very useful since it was just the name of the exception (like EvalError). Ideally in the future this would be a unique, easily googleable error ID (like rustc). * "trace:" is now just "…". This assumes error contexts start with something like "while doing X". Example before: error: --- AssertionError ---------------------------------------------------------------------------------------- nix at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| assertion 'false' failed ----------------------------------------------------- show-trace ----------------------------------------------------- trace: while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) { Example after: error: assertion 'false' failed at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| … while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) {
2021-01-20 23:27:36 +00:00
auto error = hintformat(errorText);
error
% drvstr
% neededSystem
% concatStringsSep<StringSet>(", ", requiredFeatures)
% machines.size();
for (auto & m : machines)
error
% concatStringsSep<vector<string>>(", ", m.systemTypes)
% m.maxJobs
% concatStringsSep<StringSet>(", ", m.supportedFeatures)
% concatStringsSep<StringSet>(", ", m.mandatoryFeatures);
2020-08-05 16:58:00 +00:00
Improve error formatting Changes: * The divider lines are gone. These were in practice a bit confusing, in particular with --show-trace or --keep-going, since then there were multiple lines, suggesting a start/end which wasn't the case. * Instead, multi-line error messages are now indented to align with the prefix (e.g. "error: "). * The 'description' field is gone since we weren't really using it. * 'hint' is renamed to 'msg' since it really wasn't a hint. * The error is now printed *before* the location info. * The 'name' field is no longer printed since most of the time it wasn't very useful since it was just the name of the exception (like EvalError). Ideally in the future this would be a unique, easily googleable error ID (like rustc). * "trace:" is now just "…". This assumes error contexts start with something like "while doing X". Example before: error: --- AssertionError ---------------------------------------------------------------------------------------- nix at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| assertion 'false' failed ----------------------------------------------------- show-trace ----------------------------------------------------- trace: while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) { Example after: error: assertion 'false' failed at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| … while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) {
2021-01-20 23:27:36 +00:00
printMsg(canBuildLocally ? lvlChatty : lvlWarn, error);
2020-08-05 16:58:00 +00:00
2017-03-03 15:18:49 +00:00
std::cerr << "# decline\n";
2020-08-05 16:58:00 +00:00
}
2016-07-18 22:50:27 +00:00
break;
}
#if __APPLE__
futimes(bestSlotLock.get(), NULL);
#else
2016-07-18 22:50:27 +00:00
futimens(bestSlotLock.get(), NULL);
#endif
2016-07-18 22:50:27 +00:00
lock = -1;
try {
Activity act(*logger, lvlTalkative, actUnknown, fmt("connecting to '%s'", bestMachine->storeUri));
sshStore = bestMachine->openStore();
sshStore->connect();
storeUri = bestMachine->storeUri;
2016-07-18 22:50:27 +00:00
} catch (std::exception & e) {
auto msg = chomp(drainFD(5, false));
Improve error formatting Changes: * The divider lines are gone. These were in practice a bit confusing, in particular with --show-trace or --keep-going, since then there were multiple lines, suggesting a start/end which wasn't the case. * Instead, multi-line error messages are now indented to align with the prefix (e.g. "error: "). * The 'description' field is gone since we weren't really using it. * 'hint' is renamed to 'msg' since it really wasn't a hint. * The error is now printed *before* the location info. * The 'name' field is no longer printed since most of the time it wasn't very useful since it was just the name of the exception (like EvalError). Ideally in the future this would be a unique, easily googleable error ID (like rustc). * "trace:" is now just "…". This assumes error contexts start with something like "while doing X". Example before: error: --- AssertionError ---------------------------------------------------------------------------------------- nix at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| assertion 'false' failed ----------------------------------------------------- show-trace ----------------------------------------------------- trace: while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) { Example after: error: assertion 'false' failed at: (7:7) in file: /home/eelco/Dev/nixpkgs/pkgs/applications/misc/hello/default.nix 6| 7| x = assert false; 1; | ^ 8| … while evaluating the attribute 'x' of the derivation 'hello-2.10' at: (192:11) in file: /home/eelco/Dev/nixpkgs/pkgs/stdenv/generic/make-derivation.nix 191| // (lib.optionalAttrs (!(attrs ? name) && attrs ? pname && attrs ? version)) { 192| name = "${attrs.pname}-${attrs.version}"; | ^ 193| } // (lib.optionalAttrs (stdenv.hostPlatform != stdenv.buildPlatform && !dontAddHostSuffix && (attrs ? name || (attrs ? pname && attrs ? version)))) {
2021-01-20 23:27:36 +00:00
printError("cannot build on '%s': %s%s",
bestMachine->storeUri, e.what(),
msg.empty() ? "" : ": " + msg);
2016-07-18 22:50:27 +00:00
bestMachine->enabled = false;
continue;
}
2016-07-18 22:50:27 +00:00
goto connected;
}
}
2017-03-03 15:18:49 +00:00
2016-07-18 22:50:27 +00:00
connected:
close(5);
std::cerr << "# accept\n" << storeUri << "\n";
auto inputs = readStrings<PathSet>(source);
auto outputs = readStrings<PathSet>(source);
AutoCloseFD uploadLock = openLockFile(currentLoad + "/" + escapeUri(storeUri) + ".upload-lock", true);
{
Activity act(*logger, lvlTalkative, actUnknown, fmt("waiting for the upload lock to '%s'", storeUri));
auto old = signal(SIGALRM, handleAlarm);
alarm(15 * 60);
if (!lockFile(uploadLock.get(), ltWrite, true))
printError("somebody is hogging the upload lock for '%s', continuing...");
alarm(0);
signal(SIGALRM, old);
}
auto substitute = settings.buildersUseSubstitutes ? Substitute : NoSubstitute;
{
Activity act(*logger, lvlTalkative, actUnknown, fmt("copying dependencies to '%s'", storeUri));
copyPaths(store, ref<Store>(sshStore), store->parseStorePathSet(inputs), NoRepair, NoCheckSigs, substitute);
}
2016-07-18 22:50:27 +00:00
uploadLock = -1;
auto drv = store->readDerivation(*drvPath);
drv.inputSrcs = store->parseStorePathSet(inputs);
auto result = sshStore->buildDerivation(*drvPath, drv);
if (!result.success())
throw Error("build of '%s' on '%s' failed: %s", store->printStorePath(*drvPath), storeUri, result.errorMsg);
2016-07-18 22:50:27 +00:00
StorePathSet missing;
for (auto & path : outputs)
if (!store->isValidPath(store->parseStorePath(path))) missing.insert(store->parseStorePath(path));
if (!missing.empty()) {
Activity act(*logger, lvlTalkative, actUnknown, fmt("copying outputs from '%s'", storeUri));
if (auto localStore = store.dynamic_pointer_cast<LocalStore>())
for (auto & i : missing)
localStore->locksHeld.insert(store->printStorePath(i)); /* FIXME: ugly */
copyPaths(ref<Store>(sshStore), store, missing, NoRepair, NoCheckSigs, NoSubstitute);
2016-07-18 22:50:27 +00:00
}
return 0;
}
2016-07-18 22:50:27 +00:00
}
static RegisterLegacyCommand r_build_remote("build-remote", main_build_remote);