hydra-eval-jobs: Parallelize

(cherry picked from commit be8eb9d00d)
This commit is contained in:
Eelco Dolstra 2020-02-19 21:10:22 +01:00
parent adf61e5cf8
commit c642f787ee
No known key found for this signature in database
GPG key ID: 8170B4726D7198DE

View file

@ -1,35 +1,63 @@
#include <map> #include <map>
#include <iostream> #include <iostream>
#define GC_LINUX_THREADS 1
#include <gc/gc_allocator.h>
#include "shared.hh" #include "shared.hh"
#include "store-api.hh" #include "store-api.hh"
#include "eval.hh" #include "eval.hh"
#include "eval-inline.hh" #include "eval-inline.hh"
#include "util.hh" #include "util.hh"
#include "json.hh"
#include "get-drvs.hh" #include "get-drvs.hh"
#include "globals.hh" #include "globals.hh"
#include "common-eval-args.hh" #include "common-eval-args.hh"
#include "attr-path.hh"
#include "derivations.hh"
#include "hydra-config.hh" #include "hydra-config.hh"
#include <sys/types.h> #include <sys/types.h>
#include <sys/wait.h> #include <sys/wait.h>
#include <sys/resource.h>
#include <nlohmann/json.hpp>
using namespace nix; using namespace nix;
static Path gcRootsDir; static Path gcRootsDir;
static size_t maxMemorySize;
struct MyArgs : MixEvalArgs, MixCommonArgs
{
Path releaseExpr;
bool dryRun = false;
static void findJobs(EvalState & state, JSONObject & top, MyArgs() : MixCommonArgs("hydra-eval-jobs")
Bindings & autoArgs, Value & v, const string & attrPath); {
mkFlag()
.longName("help")
.description("show usage information")
.handler([&]() {
printHelp(programName, std::cout);
throw Exit();
});
mkFlag()
.longName("gc-roots-dir")
.description("garbage collector roots directory")
.labels({"path"})
.dest(&gcRootsDir);
static string queryMetaStrings(EvalState & state, DrvInfo & drv, const string & name, const string & subAttribute) mkFlag()
.longName("dry-run")
.description("don't create store derivations")
.set(&dryRun, true);
expectArg("expr", &releaseExpr);
}
};
static MyArgs myArgs;
static std::string queryMetaStrings(EvalState & state, DrvInfo & drv, const string & name, const string & subAttribute)
{ {
Strings res; Strings res;
std::function<void(Value & v)> rec; std::function<void(Value & v)> rec;
@ -54,170 +82,135 @@ static string queryMetaStrings(EvalState & state, DrvInfo & drv, const string &
return concatStringsSep(", ", res); return concatStringsSep(", ", res);
} }
static void worker(
static std::string lastAttrPath; EvalState & state,
static bool comma = false; Bindings & autoArgs,
static size_t maxHeapSize; AutoCloseFD & to,
AutoCloseFD & from)
struct BailOut { };
bool lte(const std::string & s1, const std::string & s2)
{ {
size_t p1 = 0, p2 = 0; Value vTop;
state.evalFile(lookupFileArg(state, myArgs.releaseExpr), vTop);
auto vRoot = state.allocValue();
state.autoCallFunction(autoArgs, vTop, *vRoot);
while (true) { while (true) {
if (p1 == s1.size()) return p2 == s2.size(); /* Wait for the master to send us a job name. */
if (p2 == s2.size()) return true; writeLine(to.get(), "next");
auto d1 = s1.find('.', p1); auto s = readLine(from.get());
auto d2 = s2.find('.', p2); if (s == "exit") break;
if (!hasPrefix(s, "do ")) abort();
std::string attrPath(s, 3);
auto c = s1.compare(p1, d1 - p1, s2, p2, d2 - p2); debug("worker process %d at '%s'", getpid(), attrPath);
if (c < 0) return true; /* Evaluate it and send info back to the master. */
if (c > 0) return false; nlohmann::json reply;
p1 = d1 == std::string::npos ? s1.size() : d1 + 1; try {
p2 = d2 == std::string::npos ? s2.size() : d2 + 1; auto v = findAlongAttrPath(state, attrPath, autoArgs, *vRoot).first;
}
} state.forceValue(*v);
if (auto drv = getDerivation(state, *v, false)) {
DrvInfo::Outputs outputs = drv->queryOutputs();
if (drv->querySystem() == "unknown")
throw EvalError("derivation must have a 'system' attribute");
auto drvPath = drv->queryDrvPath();
nlohmann::json job;
job["nixName"] = drv->queryName();
job["system"] =drv->querySystem();
job["drvPath"] = drvPath;
job["description"] = drv->queryMetaString("description");
job["license"] = queryMetaStrings(state, *drv, "license", "shortName");
job["homepage"] = drv->queryMetaString("homepage");
job["maintainers"] = queryMetaStrings(state, *drv, "maintainers", "email");
job["schedulingPriority"] = drv->queryMetaInt("schedulingPriority", 100);
job["timeout"] = drv->queryMetaInt("timeout", 36000);
job["maxSilent"] = drv->queryMetaInt("maxSilent", 7200);
job["isChannel"] = drv->queryMetaBool("isHydraChannel", false);
/* If this is an aggregate, then get its constituents. */
auto a = v->attrs->get(state.symbols.create("_hydraAggregate"));
if (a && state.forceBool(*a->value, *a->pos)) {
auto a = v->attrs->get(state.symbols.create("constituents"));
if (!a)
throw EvalError("derivation must have a constituents attribute");
static void findJobsWrapped(EvalState & state, JSONObject & top, PathSet context;
Bindings & autoArgs, Value & vIn, const string & attrPath) state.coerceToString(*a->pos, *a->value, context, true, false);
{ for (auto & i : context)
if (lastAttrPath != "" && lte(attrPath, lastAttrPath)) return; if (i.at(0) == '!') {
size_t index = i.find("!", 1);
job["constituents"].push_back(string(i, index + 1));
}
debug(format("at path `%1%'") % attrPath); state.forceList(*a->value, *a->pos);
for (unsigned int n = 0; n < a->value->listSize(); ++n) {
checkInterrupt(); auto v = a->value->listElems()[n];
state.forceValue(*v);
Value v; if (v->type == tString)
state.autoCallFunction(autoArgs, vIn, v); job["namedConstituents"].push_back(state.forceStringNoCtx(*v));
if (v.type == tAttrs) {
auto drv = getDerivation(state, v, false);
if (drv) {
Path drvPath;
DrvInfo::Outputs outputs = drv->queryOutputs();
if (drv->querySystem() == "unknown")
throw EvalError("derivation must have a system attribute");
if (comma) { std::cout << ","; comma = false; }
{
auto res = top.object(attrPath);
res.attr("nixName", drv->queryName());
res.attr("system", drv->querySystem());
res.attr("drvPath", drvPath = drv->queryDrvPath());
res.attr("description", drv->queryMetaString("description"));
res.attr("license", queryMetaStrings(state, *drv, "license", "shortName"));
res.attr("homepage", drv->queryMetaString("homepage"));
res.attr("maintainers", queryMetaStrings(state, *drv, "maintainers", "email"));
res.attr("schedulingPriority", drv->queryMetaInt("schedulingPriority", 100));
res.attr("timeout", drv->queryMetaInt("timeout", 36000));
res.attr("maxSilent", drv->queryMetaInt("maxSilent", 7200));
res.attr("isChannel", drv->queryMetaBool("isHydraChannel", false));
/* If this is an aggregate, then get its constituents. */
Bindings::iterator a = v.attrs->find(state.symbols.create("_hydraAggregate"));
if (a != v.attrs->end() && state.forceBool(*a->value, *a->pos)) {
Bindings::iterator a = v.attrs->find(state.symbols.create("constituents"));
if (a == v.attrs->end())
throw EvalError("derivation must have a constituents attribute");
PathSet context;
state.coerceToString(*a->pos, *a->value, context, true, false);
PathSet drvs;
for (auto & i : context)
if (i.at(0) == '!') {
size_t index = i.find("!", 1);
drvs.insert(string(i, index + 1));
} }
res.attr("constituents", concatStringsSep(" ", drvs)); }
/* Register the derivation as a GC root. !!! This
registers roots for jobs that we may have already
done. */
auto localStore = state.store.dynamic_pointer_cast<LocalFSStore>();
if (gcRootsDir != "" && localStore) {
Path root = gcRootsDir + "/" + std::string(baseNameOf(drvPath));
if (!pathExists(root))
localStore->addPermRoot(localStore->parseStorePath(drvPath), root, false);
}
nlohmann::json out;
for (auto & j : outputs)
out[j.first] = j.second;
job["outputs"] = std::move(out);
reply["job"] = std::move(job);
} }
/* Register the derivation as a GC root. !!! This else if (v->type == tAttrs) {
registers roots for jobs that we may have already auto attrs = nlohmann::json::array();
done. */ StringSet ss;
auto localStore = state.store.dynamic_pointer_cast<LocalFSStore>(); for (auto & i : v->attrs->lexicographicOrder()) {
if (gcRootsDir != "" && localStore) {
Path root = gcRootsDir + "/" + std::string(baseNameOf(drvPath));
if (!pathExists(root))
localStore->addPermRoot(localStore->parseStorePath(drvPath), root, false);
}
auto res2 = res.object("outputs");
for (auto & j : outputs)
res2.attr(j.first, j.second);
}
GC_prof_stats_s gc;
GC_get_prof_stats(&gc, sizeof(gc));
if (gc.heapsize_full > maxHeapSize) {
printInfo("restarting hydra-eval-jobs after job '%s' because heap size is at %d bytes", attrPath, gc.heapsize_full);
lastAttrPath = attrPath;
throw BailOut();
}
}
else {
if (!state.isDerivation(v)) {
for (auto & i : v.attrs->lexicographicOrder()) {
std::string name(i->name); std::string name(i->name);
if (name.find('.') != std::string::npos || name.find(' ') != std::string::npos) {
/* Skip jobs with dots in the name. */
if (name.find('.') != std::string::npos) {
printError("skipping job with illegal name '%s'", name); printError("skipping job with illegal name '%s'", name);
continue; continue;
} }
attrs.push_back(name);
findJobs(state, top, autoArgs, *i->value,
(attrPath.empty() ? "" : attrPath + ".") + name);
} }
reply["attrs"] = std::move(attrs);
} }
} catch (EvalError & e) {
reply["error"] = filterANSIEscapes(e.msg(), true);
} }
writeLine(to.get(), reply.dump());
/* If our RSS exceeds the maximum, exit. The master will
start a new process. */
struct rusage r;
getrusage(RUSAGE_SELF, &r);
if ((size_t) r.ru_maxrss > maxMemorySize * 1024) break;
} }
else if (v.type == tNull) { writeLine(to.get(), "restart");
// allow null values, meaning 'do nothing'
}
else
throw TypeError(format("unsupported value: %1%") % v);
} }
static void findJobs(EvalState & state, JSONObject & top,
Bindings & autoArgs, Value & v, const string & attrPath)
{
try {
findJobsWrapped(state, top, autoArgs, v, attrPath);
} catch (EvalError & e) {
if (comma) { std::cout << ","; comma = false; }
auto res = top.object(attrPath);
res.attr("error", filterANSIEscapes(e.msg(), true));
}
}
int main(int argc, char * * argv) int main(int argc, char * * argv)
{ {
assert(lte("abc", "def"));
assert(lte("abc", "def.foo"));
assert(!lte("def", "abc"));
assert(lte("nixpkgs.hello", "nixpkgs"));
assert(lte("nixpkgs.hello", "nixpkgs.hellooo"));
assert(lte("gitAndTools.git-annex.x86_64-darwin", "gitAndTools.git-annex.x86_64-linux"));
assert(lte("gitAndTools.git-annex.x86_64-linux", "gitAndTools.git-annex-remote-b2.aarch64-linux"));
/* Prevent undeclared dependencies in the evaluation via /* Prevent undeclared dependencies in the evaluation via
$NIX_PATH. */ $NIX_PATH. */
unsetenv("NIX_PATH"); unsetenv("NIX_PATH");
@ -226,116 +219,211 @@ int main(int argc, char * * argv)
auto config = std::make_unique<::Config>(); auto config = std::make_unique<::Config>();
auto initialHeapSize = config->getStrOption("evaluator_initial_heap_size", ""); auto nrWorkers = config->getIntOption("evaluator_workers", 1);
if (initialHeapSize != "") maxMemorySize = config->getIntOption("evaluator_max_memory_size", 4096);
setenv("GC_INITIAL_HEAP_SIZE", initialHeapSize.c_str(), 1);
maxHeapSize = config->getIntOption("evaluator_max_heap_size", 1UL << 30);
initNix(); initNix();
initGC(); initGC();
/* Read the current heap size, which is the initial heap size. */
GC_prof_stats_s gc;
GC_get_prof_stats(&gc, sizeof(gc));
auto initialHeapSizeInt = gc.heapsize_full;
/* Then make sure the maximum heap size will be bigger than the initial heap size. */
if (initialHeapSizeInt > maxHeapSize) {
printInfo("warning: evaluator_initial_heap_size (%d) bigger than evaluator_max_heap_size (%d).", initialHeapSizeInt, maxHeapSize);
maxHeapSize = initialHeapSizeInt * 1.1;
printInfo(" evaluator_max_heap_size now set to %d.", maxHeapSize);
}
Path releaseExpr;
struct MyArgs : LegacyArgs, MixEvalArgs
{
using LegacyArgs::LegacyArgs;
};
MyArgs myArgs(baseNameOf(argv[0]), [&](Strings::iterator & arg, const Strings::iterator & end) {
if (*arg == "--gc-roots-dir")
gcRootsDir = getArg(*arg, arg, end);
else if (*arg == "--dry-run")
settings.readOnlyMode = true;
else if (*arg != "" && arg->at(0) == '-')
return false;
else
releaseExpr = *arg;
return true;
});
myArgs.parseCmdline(argvToStrings(argc, argv)); myArgs.parseCmdline(argvToStrings(argc, argv));
JSONObject json(std::cout, true); /* FIXME: The build hook in conjunction with import-from-derivation is causing "unexpected EOF" during eval */
std::cout.flush(); settings.builders = "";
do { /* Prevent access to paths outside of the Nix search path and
to the environment. */
evalSettings.restrictEval = true;
Pipe pipe; if (myArgs.dryRun) settings.readOnlyMode = true;
pipe.create();
ProcessOptions options; if (myArgs.releaseExpr == "") throw UsageError("no expression specified");
options.allowVfork = false;
GC_atfork_prepare(); if (gcRootsDir == "") printMsg(lvlError, "warning: `--gc-roots-dir' not specified");
auto pid = startProcess([&]() { struct State
pipe.readSide = -1; {
std::set<std::string> todo{""};
std::set<std::string> active;
nlohmann::json jobs;
std::exception_ptr exc;
};
GC_atfork_child(); std::condition_variable wakeup;
GC_start_mark_threads();
if (lastAttrPath != "") debug("resuming from '%s'", lastAttrPath); Sync<State> state_;
/* FIXME: The build hook in conjunction with import-from-derivation is causing "unexpected EOF" during eval */ /* Start a handler thread per worker process. */
settings.builders = ""; auto handler = [&]()
{
try {
pid_t pid = -1;
AutoCloseFD from, to;
/* Prevent access to paths outside of the Nix search path and while (true) {
to the environment. */
evalSettings.restrictEval = true;
if (releaseExpr == "") throw UsageError("no expression specified"); /* Start a new worker process if necessary. */
if (pid == -1) {
Pipe toPipe, fromPipe;
toPipe.create();
fromPipe.create();
pid = startProcess(
[&,
to{std::make_shared<AutoCloseFD>(std::move(fromPipe.writeSide))},
from{std::make_shared<AutoCloseFD>(std::move(toPipe.readSide))}
]()
{
try {
EvalState state(myArgs.searchPath, openStore());
Bindings & autoArgs = *myArgs.getAutoArgs(state);
worker(state, autoArgs, *to, *from);
} catch (std::exception & e) {
nlohmann::json err;
err["error"] = e.what();
writeLine(to->get(), err.dump());
}
},
ProcessOptions { .allowVfork = false });
from = std::move(fromPipe.readSide);
to = std::move(toPipe.writeSide);
debug("created worker process %d", pid);
}
if (gcRootsDir == "") printMsg(lvlError, "warning: `--gc-roots-dir' not specified"); /* Check whether the existing worker process is still there. */
auto s = readLine(from.get());
if (s == "restart") {
pid = -1;
continue;
} else if (s != "next") {
auto json = nlohmann::json::parse(s);
throw Error("worker error: %s", (std::string) json["error"]);
}
EvalState state(myArgs.searchPath, openStore()); /* Wait for a job name to become available. */
std::string attrPath;
Bindings & autoArgs = *myArgs.getAutoArgs(state); while (true) {
checkInterrupt();
auto state(state_.lock());
if ((state->todo.empty() && state->active.empty()) || state->exc) {
writeLine(to.get(), "exit");
return;
}
if (!state->todo.empty()) {
attrPath = *state->todo.begin();
state->todo.erase(state->todo.begin());
state->active.insert(attrPath);
break;
} else
state.wait(wakeup);
}
Value v; /* Tell the worker to evaluate it. */
state.evalFile(lookupFileArg(state, releaseExpr), v); writeLine(to.get(), "do " + attrPath);
comma = lastAttrPath != ""; /* Wait for the response. */
auto response = nlohmann::json::parse(readLine(from.get()));
try { /* Handle the response. */
findJobs(state, json, autoArgs, v, ""); StringSet newAttrs;
lastAttrPath = "";
} catch (BailOut &) { }
writeFull(pipe.writeSide.get(), lastAttrPath); if (response.find("job") != response.end()) {
auto state(state_.lock());
state->jobs[attrPath] = response["job"];
}
exit(0); if (response.find("attrs") != response.end()) {
}, options); for (auto & i : response["attrs"]) {
auto s = (attrPath.empty() ? "" : attrPath + ".") + (std::string) i;
newAttrs.insert(s);
}
}
GC_atfork_parent(); if (response.find("error") != response.end()) {
auto state(state_.lock());
state->jobs[attrPath]["error"] = response["error"];
}
pipe.writeSide = -1; /* Add newly discovered job names to the queue. */
{
auto state(state_.lock());
state->active.erase(attrPath);
for (auto & s : newAttrs)
state->todo.insert(s);
wakeup.notify_all();
}
}
} catch (...) {
auto state(state_.lock());
state->exc = std::current_exception();
wakeup.notify_all();
}
};
int status; std::vector<std::thread> threads;
while (true) { for (size_t i = 0; i < nrWorkers; i++)
checkInterrupt(); threads.emplace_back(std::thread(handler));
if (waitpid(pid, &status, 0) == pid) break;
if (errno != EINTR) continue; for (auto & thread : threads)
thread.join();
auto state(state_.lock());
if (state->exc)
std::rethrow_exception(state->exc);
/* For aggregate jobs that have named consistuents
(i.e. constituents that are a job name rather than a
derivation), look up the referenced job and add it to the
dependencies of the aggregate derivation. */
auto store = openStore();
for (auto i = state->jobs.begin(); i != state->jobs.end(); ++i) {
auto jobName = i.key();
auto & job = i.value();
auto named = job.find("namedConstituents");
if (named == job.end()) continue;
if (myArgs.dryRun) {
for (std::string jobName2 : *named) {
auto job2 = state->jobs.find(jobName2);
if (job2 == state->jobs.end())
throw Error("aggregate job '%s' references non-existent job '%s'", jobName, jobName2);
std::string drvPath2 = (*job2)["drvPath"];
job["constituents"].push_back(drvPath2);
}
} else {
std::string drvPath = job["drvPath"];
auto drv = readDerivation(*store, drvPath);
for (std::string jobName2 : *named) {
auto job2 = state->jobs.find(jobName2);
if (job2 == state->jobs.end())
throw Error("aggregate job '%s' references non-existent job '%s'", jobName, jobName2);
std::string drvPath2 = (*job2)["drvPath"];
auto drv2 = readDerivation(*store, drvPath2);
job["constituents"].push_back(drvPath2);
drv.inputDrvs[store->parseStorePath(drvPath2)] = {drv2.outputs.begin()->first};
}
std::string drvName(store->parseStorePath(drvPath).name());
assert(hasSuffix(drvName, drvExtension));
drvName.resize(drvName.size() - drvExtension.size());
auto h = hashDerivationModulo(*store, drv, true);
auto outPath = store->makeOutputPath("out", h, drvName);
drv.env["out"] = store->printStorePath(outPath);
drv.outputs.insert_or_assign("out", DerivationOutput(outPath.clone(), "", ""));
auto newDrvPath = store->printStorePath(writeDerivation(store, drv, drvName));
debug("rewrote aggregate derivation %s -> %s", drvPath, newDrvPath);
job["drvPath"] = newDrvPath;
job["outputs"]["out"] = store->printStorePath(outPath);
} }
if (status != 0) job.erase("namedConstituents");
throw Exit(WIFEXITED(status) ? WEXITSTATUS(status) : 99); }
maxHeapSize += 64 * 1024 * 1024; std::cout << state->jobs.dump(2) << "\n";
lastAttrPath = drainFD(pipe.readSide.get());
} while (lastAttrPath != "");
}); });
} }