From c642f787ee5f4b1fcb14309b19c5841e2a2e5e75 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 19 Feb 2020 21:10:22 +0100 Subject: [PATCH] hydra-eval-jobs: Parallelize (cherry picked from commit be8eb9d00d6a3fcac520bc7dfd5740c0b3ade746) --- src/hydra-eval-jobs/hydra-eval-jobs.cc | 542 ++++++++++++++----------- 1 file changed, 315 insertions(+), 227 deletions(-) diff --git a/src/hydra-eval-jobs/hydra-eval-jobs.cc b/src/hydra-eval-jobs/hydra-eval-jobs.cc index 8abdea7e..1514cf4b 100644 --- a/src/hydra-eval-jobs/hydra-eval-jobs.cc +++ b/src/hydra-eval-jobs/hydra-eval-jobs.cc @@ -1,35 +1,63 @@ #include #include -#define GC_LINUX_THREADS 1 -#include - #include "shared.hh" #include "store-api.hh" #include "eval.hh" #include "eval-inline.hh" #include "util.hh" -#include "json.hh" #include "get-drvs.hh" #include "globals.hh" #include "common-eval-args.hh" +#include "attr-path.hh" +#include "derivations.hh" #include "hydra-config.hh" #include #include +#include + +#include using namespace nix; - static Path gcRootsDir; +static size_t maxMemorySize; +struct MyArgs : MixEvalArgs, MixCommonArgs +{ + Path releaseExpr; + bool dryRun = false; -static void findJobs(EvalState & state, JSONObject & top, - Bindings & autoArgs, Value & v, const string & attrPath); + MyArgs() : MixCommonArgs("hydra-eval-jobs") + { + mkFlag() + .longName("help") + .description("show usage information") + .handler([&]() { + printHelp(programName, std::cout); + throw Exit(); + }); + mkFlag() + .longName("gc-roots-dir") + .description("garbage collector roots directory") + .labels({"path"}) + .dest(&gcRootsDir); -static string queryMetaStrings(EvalState & state, DrvInfo & drv, const string & name, const string & subAttribute) + mkFlag() + .longName("dry-run") + .description("don't create store derivations") + .set(&dryRun, true); + + expectArg("expr", &releaseExpr); + } +}; + +static MyArgs myArgs; + +static std::string queryMetaStrings(EvalState & state, DrvInfo & drv, const string & name, const string & subAttribute) { Strings res; std::function rec; @@ -54,170 +82,135 @@ static string queryMetaStrings(EvalState & state, DrvInfo & drv, const string & return concatStringsSep(", ", res); } - -static std::string lastAttrPath; -static bool comma = false; -static size_t maxHeapSize; - - -struct BailOut { }; - - -bool lte(const std::string & s1, const std::string & s2) +static void worker( + EvalState & state, + Bindings & autoArgs, + AutoCloseFD & to, + AutoCloseFD & from) { - size_t p1 = 0, p2 = 0; + Value vTop; + state.evalFile(lookupFileArg(state, myArgs.releaseExpr), vTop); + + auto vRoot = state.allocValue(); + state.autoCallFunction(autoArgs, vTop, *vRoot); while (true) { - if (p1 == s1.size()) return p2 == s2.size(); - if (p2 == s2.size()) return true; + /* Wait for the master to send us a job name. */ + writeLine(to.get(), "next"); - auto d1 = s1.find('.', p1); - auto d2 = s2.find('.', p2); + auto s = readLine(from.get()); + if (s == "exit") break; + if (!hasPrefix(s, "do ")) abort(); + std::string attrPath(s, 3); - auto c = s1.compare(p1, d1 - p1, s2, p2, d2 - p2); + debug("worker process %d at '%s'", getpid(), attrPath); - if (c < 0) return true; - if (c > 0) return false; + /* Evaluate it and send info back to the master. */ + nlohmann::json reply; - p1 = d1 == std::string::npos ? s1.size() : d1 + 1; - p2 = d2 == std::string::npos ? s2.size() : d2 + 1; - } -} + try { + auto v = findAlongAttrPath(state, attrPath, autoArgs, *vRoot).first; + + state.forceValue(*v); + + if (auto drv = getDerivation(state, *v, false)) { + + DrvInfo::Outputs outputs = drv->queryOutputs(); + + if (drv->querySystem() == "unknown") + throw EvalError("derivation must have a 'system' attribute"); + + auto drvPath = drv->queryDrvPath(); + + nlohmann::json job; + + job["nixName"] = drv->queryName(); + job["system"] =drv->querySystem(); + job["drvPath"] = drvPath; + job["description"] = drv->queryMetaString("description"); + job["license"] = queryMetaStrings(state, *drv, "license", "shortName"); + job["homepage"] = drv->queryMetaString("homepage"); + job["maintainers"] = queryMetaStrings(state, *drv, "maintainers", "email"); + job["schedulingPriority"] = drv->queryMetaInt("schedulingPriority", 100); + job["timeout"] = drv->queryMetaInt("timeout", 36000); + job["maxSilent"] = drv->queryMetaInt("maxSilent", 7200); + job["isChannel"] = drv->queryMetaBool("isHydraChannel", false); + + /* If this is an aggregate, then get its constituents. */ + auto a = v->attrs->get(state.symbols.create("_hydraAggregate")); + if (a && state.forceBool(*a->value, *a->pos)) { + auto a = v->attrs->get(state.symbols.create("constituents")); + if (!a) + throw EvalError("derivation must have a ‘constituents’ attribute"); -static void findJobsWrapped(EvalState & state, JSONObject & top, - Bindings & autoArgs, Value & vIn, const string & attrPath) -{ - if (lastAttrPath != "" && lte(attrPath, lastAttrPath)) return; + PathSet context; + state.coerceToString(*a->pos, *a->value, context, true, false); + for (auto & i : context) + if (i.at(0) == '!') { + size_t index = i.find("!", 1); + job["constituents"].push_back(string(i, index + 1)); + } - debug(format("at path `%1%'") % attrPath); - - checkInterrupt(); - - Value v; - state.autoCallFunction(autoArgs, vIn, v); - - if (v.type == tAttrs) { - - auto drv = getDerivation(state, v, false); - - if (drv) { - Path drvPath; - - DrvInfo::Outputs outputs = drv->queryOutputs(); - - if (drv->querySystem() == "unknown") - throw EvalError("derivation must have a ‘system’ attribute"); - - if (comma) { std::cout << ","; comma = false; } - - { - auto res = top.object(attrPath); - res.attr("nixName", drv->queryName()); - res.attr("system", drv->querySystem()); - res.attr("drvPath", drvPath = drv->queryDrvPath()); - res.attr("description", drv->queryMetaString("description")); - res.attr("license", queryMetaStrings(state, *drv, "license", "shortName")); - res.attr("homepage", drv->queryMetaString("homepage")); - res.attr("maintainers", queryMetaStrings(state, *drv, "maintainers", "email")); - res.attr("schedulingPriority", drv->queryMetaInt("schedulingPriority", 100)); - res.attr("timeout", drv->queryMetaInt("timeout", 36000)); - res.attr("maxSilent", drv->queryMetaInt("maxSilent", 7200)); - res.attr("isChannel", drv->queryMetaBool("isHydraChannel", false)); - - /* If this is an aggregate, then get its constituents. */ - Bindings::iterator a = v.attrs->find(state.symbols.create("_hydraAggregate")); - if (a != v.attrs->end() && state.forceBool(*a->value, *a->pos)) { - Bindings::iterator a = v.attrs->find(state.symbols.create("constituents")); - if (a == v.attrs->end()) - throw EvalError("derivation must have a ‘constituents’ attribute"); - PathSet context; - state.coerceToString(*a->pos, *a->value, context, true, false); - PathSet drvs; - for (auto & i : context) - if (i.at(0) == '!') { - size_t index = i.find("!", 1); - drvs.insert(string(i, index + 1)); + state.forceList(*a->value, *a->pos); + for (unsigned int n = 0; n < a->value->listSize(); ++n) { + auto v = a->value->listElems()[n]; + state.forceValue(*v); + if (v->type == tString) + job["namedConstituents"].push_back(state.forceStringNoCtx(*v)); } - res.attr("constituents", concatStringsSep(" ", drvs)); + } + + /* Register the derivation as a GC root. !!! This + registers roots for jobs that we may have already + done. */ + auto localStore = state.store.dynamic_pointer_cast(); + if (gcRootsDir != "" && localStore) { + Path root = gcRootsDir + "/" + std::string(baseNameOf(drvPath)); + if (!pathExists(root)) + localStore->addPermRoot(localStore->parseStorePath(drvPath), root, false); + } + + nlohmann::json out; + for (auto & j : outputs) + out[j.first] = j.second; + job["outputs"] = std::move(out); + + reply["job"] = std::move(job); } - /* Register the derivation as a GC root. !!! This - registers roots for jobs that we may have already - done. */ - auto localStore = state.store.dynamic_pointer_cast(); - if (gcRootsDir != "" && localStore) { - Path root = gcRootsDir + "/" + std::string(baseNameOf(drvPath)); - if (!pathExists(root)) - localStore->addPermRoot(localStore->parseStorePath(drvPath), root, false); - } - - auto res2 = res.object("outputs"); - for (auto & j : outputs) - res2.attr(j.first, j.second); - - } - - GC_prof_stats_s gc; - GC_get_prof_stats(&gc, sizeof(gc)); - - if (gc.heapsize_full > maxHeapSize) { - printInfo("restarting hydra-eval-jobs after job '%s' because heap size is at %d bytes", attrPath, gc.heapsize_full); - lastAttrPath = attrPath; - throw BailOut(); - } - } - - else { - if (!state.isDerivation(v)) { - for (auto & i : v.attrs->lexicographicOrder()) { + else if (v->type == tAttrs) { + auto attrs = nlohmann::json::array(); + StringSet ss; + for (auto & i : v->attrs->lexicographicOrder()) { std::string name(i->name); - - /* Skip jobs with dots in the name. */ - if (name.find('.') != std::string::npos) { + if (name.find('.') != std::string::npos || name.find(' ') != std::string::npos) { printError("skipping job with illegal name '%s'", name); continue; } - - findJobs(state, top, autoArgs, *i->value, - (attrPath.empty() ? "" : attrPath + ".") + name); + attrs.push_back(name); } + reply["attrs"] = std::move(attrs); } + + } catch (EvalError & e) { + reply["error"] = filterANSIEscapes(e.msg(), true); } + + writeLine(to.get(), reply.dump()); + + /* If our RSS exceeds the maximum, exit. The master will + start a new process. */ + struct rusage r; + getrusage(RUSAGE_SELF, &r); + if ((size_t) r.ru_maxrss > maxMemorySize * 1024) break; } - else if (v.type == tNull) { - // allow null values, meaning 'do nothing' - } - - else - throw TypeError(format("unsupported value: %1%") % v); + writeLine(to.get(), "restart"); } - -static void findJobs(EvalState & state, JSONObject & top, - Bindings & autoArgs, Value & v, const string & attrPath) -{ - try { - findJobsWrapped(state, top, autoArgs, v, attrPath); - } catch (EvalError & e) { - if (comma) { std::cout << ","; comma = false; } - auto res = top.object(attrPath); - res.attr("error", filterANSIEscapes(e.msg(), true)); - } -} - - int main(int argc, char * * argv) { - assert(lte("abc", "def")); - assert(lte("abc", "def.foo")); - assert(!lte("def", "abc")); - assert(lte("nixpkgs.hello", "nixpkgs")); - assert(lte("nixpkgs.hello", "nixpkgs.hellooo")); - assert(lte("gitAndTools.git-annex.x86_64-darwin", "gitAndTools.git-annex.x86_64-linux")); - assert(lte("gitAndTools.git-annex.x86_64-linux", "gitAndTools.git-annex-remote-b2.aarch64-linux")); - /* Prevent undeclared dependencies in the evaluation via $NIX_PATH. */ unsetenv("NIX_PATH"); @@ -226,116 +219,211 @@ int main(int argc, char * * argv) auto config = std::make_unique<::Config>(); - auto initialHeapSize = config->getStrOption("evaluator_initial_heap_size", ""); - if (initialHeapSize != "") - setenv("GC_INITIAL_HEAP_SIZE", initialHeapSize.c_str(), 1); - - maxHeapSize = config->getIntOption("evaluator_max_heap_size", 1UL << 30); + auto nrWorkers = config->getIntOption("evaluator_workers", 1); + maxMemorySize = config->getIntOption("evaluator_max_memory_size", 4096); initNix(); initGC(); - /* Read the current heap size, which is the initial heap size. */ - GC_prof_stats_s gc; - GC_get_prof_stats(&gc, sizeof(gc)); - auto initialHeapSizeInt = gc.heapsize_full; - - /* Then make sure the maximum heap size will be bigger than the initial heap size. */ - if (initialHeapSizeInt > maxHeapSize) { - printInfo("warning: evaluator_initial_heap_size (%d) bigger than evaluator_max_heap_size (%d).", initialHeapSizeInt, maxHeapSize); - maxHeapSize = initialHeapSizeInt * 1.1; - printInfo(" evaluator_max_heap_size now set to %d.", maxHeapSize); - } - - Path releaseExpr; - - struct MyArgs : LegacyArgs, MixEvalArgs - { - using LegacyArgs::LegacyArgs; - }; - - MyArgs myArgs(baseNameOf(argv[0]), [&](Strings::iterator & arg, const Strings::iterator & end) { - if (*arg == "--gc-roots-dir") - gcRootsDir = getArg(*arg, arg, end); - else if (*arg == "--dry-run") - settings.readOnlyMode = true; - else if (*arg != "" && arg->at(0) == '-') - return false; - else - releaseExpr = *arg; - return true; - }); - myArgs.parseCmdline(argvToStrings(argc, argv)); - JSONObject json(std::cout, true); - std::cout.flush(); + /* FIXME: The build hook in conjunction with import-from-derivation is causing "unexpected EOF" during eval */ + settings.builders = ""; - do { + /* Prevent access to paths outside of the Nix search path and + to the environment. */ + evalSettings.restrictEval = true; - Pipe pipe; - pipe.create(); + if (myArgs.dryRun) settings.readOnlyMode = true; - ProcessOptions options; - options.allowVfork = false; + if (myArgs.releaseExpr == "") throw UsageError("no expression specified"); - GC_atfork_prepare(); + if (gcRootsDir == "") printMsg(lvlError, "warning: `--gc-roots-dir' not specified"); - auto pid = startProcess([&]() { - pipe.readSide = -1; + struct State + { + std::set todo{""}; + std::set active; + nlohmann::json jobs; + std::exception_ptr exc; + }; - GC_atfork_child(); - GC_start_mark_threads(); + std::condition_variable wakeup; - if (lastAttrPath != "") debug("resuming from '%s'", lastAttrPath); + Sync state_; - /* FIXME: The build hook in conjunction with import-from-derivation is causing "unexpected EOF" during eval */ - settings.builders = ""; + /* Start a handler thread per worker process. */ + auto handler = [&]() + { + try { + pid_t pid = -1; + AutoCloseFD from, to; - /* Prevent access to paths outside of the Nix search path and - to the environment. */ - evalSettings.restrictEval = true; + while (true) { - if (releaseExpr == "") throw UsageError("no expression specified"); + /* Start a new worker process if necessary. */ + if (pid == -1) { + Pipe toPipe, fromPipe; + toPipe.create(); + fromPipe.create(); + pid = startProcess( + [&, + to{std::make_shared(std::move(fromPipe.writeSide))}, + from{std::make_shared(std::move(toPipe.readSide))} + ]() + { + try { + EvalState state(myArgs.searchPath, openStore()); + Bindings & autoArgs = *myArgs.getAutoArgs(state); + worker(state, autoArgs, *to, *from); + } catch (std::exception & e) { + nlohmann::json err; + err["error"] = e.what(); + writeLine(to->get(), err.dump()); + } + }, + ProcessOptions { .allowVfork = false }); + from = std::move(fromPipe.readSide); + to = std::move(toPipe.writeSide); + debug("created worker process %d", pid); + } - if (gcRootsDir == "") printMsg(lvlError, "warning: `--gc-roots-dir' not specified"); + /* Check whether the existing worker process is still there. */ + auto s = readLine(from.get()); + if (s == "restart") { + pid = -1; + continue; + } else if (s != "next") { + auto json = nlohmann::json::parse(s); + throw Error("worker error: %s", (std::string) json["error"]); + } - EvalState state(myArgs.searchPath, openStore()); + /* Wait for a job name to become available. */ + std::string attrPath; - Bindings & autoArgs = *myArgs.getAutoArgs(state); + while (true) { + checkInterrupt(); + auto state(state_.lock()); + if ((state->todo.empty() && state->active.empty()) || state->exc) { + writeLine(to.get(), "exit"); + return; + } + if (!state->todo.empty()) { + attrPath = *state->todo.begin(); + state->todo.erase(state->todo.begin()); + state->active.insert(attrPath); + break; + } else + state.wait(wakeup); + } - Value v; - state.evalFile(lookupFileArg(state, releaseExpr), v); + /* Tell the worker to evaluate it. */ + writeLine(to.get(), "do " + attrPath); - comma = lastAttrPath != ""; + /* Wait for the response. */ + auto response = nlohmann::json::parse(readLine(from.get())); - try { - findJobs(state, json, autoArgs, v, ""); - lastAttrPath = ""; - } catch (BailOut &) { } + /* Handle the response. */ + StringSet newAttrs; - writeFull(pipe.writeSide.get(), lastAttrPath); + if (response.find("job") != response.end()) { + auto state(state_.lock()); + state->jobs[attrPath] = response["job"]; + } - exit(0); - }, options); + if (response.find("attrs") != response.end()) { + for (auto & i : response["attrs"]) { + auto s = (attrPath.empty() ? "" : attrPath + ".") + (std::string) i; + newAttrs.insert(s); + } + } - GC_atfork_parent(); + if (response.find("error") != response.end()) { + auto state(state_.lock()); + state->jobs[attrPath]["error"] = response["error"]; + } - pipe.writeSide = -1; + /* Add newly discovered job names to the queue. */ + { + auto state(state_.lock()); + state->active.erase(attrPath); + for (auto & s : newAttrs) + state->todo.insert(s); + wakeup.notify_all(); + } + } + } catch (...) { + auto state(state_.lock()); + state->exc = std::current_exception(); + wakeup.notify_all(); + } + }; - int status; - while (true) { - checkInterrupt(); - if (waitpid(pid, &status, 0) == pid) break; - if (errno != EINTR) continue; + std::vector threads; + for (size_t i = 0; i < nrWorkers; i++) + threads.emplace_back(std::thread(handler)); + + for (auto & thread : threads) + thread.join(); + + auto state(state_.lock()); + + if (state->exc) + std::rethrow_exception(state->exc); + + /* For aggregate jobs that have named consistuents + (i.e. constituents that are a job name rather than a + derivation), look up the referenced job and add it to the + dependencies of the aggregate derivation. */ + auto store = openStore(); + + for (auto i = state->jobs.begin(); i != state->jobs.end(); ++i) { + auto jobName = i.key(); + auto & job = i.value(); + + auto named = job.find("namedConstituents"); + if (named == job.end()) continue; + + if (myArgs.dryRun) { + for (std::string jobName2 : *named) { + auto job2 = state->jobs.find(jobName2); + if (job2 == state->jobs.end()) + throw Error("aggregate job '%s' references non-existent job '%s'", jobName, jobName2); + std::string drvPath2 = (*job2)["drvPath"]; + job["constituents"].push_back(drvPath2); + } + } else { + std::string drvPath = job["drvPath"]; + auto drv = readDerivation(*store, drvPath); + + for (std::string jobName2 : *named) { + auto job2 = state->jobs.find(jobName2); + if (job2 == state->jobs.end()) + throw Error("aggregate job '%s' references non-existent job '%s'", jobName, jobName2); + std::string drvPath2 = (*job2)["drvPath"]; + auto drv2 = readDerivation(*store, drvPath2); + job["constituents"].push_back(drvPath2); + drv.inputDrvs[store->parseStorePath(drvPath2)] = {drv2.outputs.begin()->first}; + } + + std::string drvName(store->parseStorePath(drvPath).name()); + assert(hasSuffix(drvName, drvExtension)); + drvName.resize(drvName.size() - drvExtension.size()); + auto h = hashDerivationModulo(*store, drv, true); + auto outPath = store->makeOutputPath("out", h, drvName); + drv.env["out"] = store->printStorePath(outPath); + drv.outputs.insert_or_assign("out", DerivationOutput(outPath.clone(), "", "")); + auto newDrvPath = store->printStorePath(writeDerivation(store, drv, drvName)); + + debug("rewrote aggregate derivation %s -> %s", drvPath, newDrvPath); + + job["drvPath"] = newDrvPath; + job["outputs"]["out"] = store->printStorePath(outPath); } - if (status != 0) - throw Exit(WIFEXITED(status) ? WEXITSTATUS(status) : 99); + job.erase("namedConstituents"); + } - maxHeapSize += 64 * 1024 * 1024; - - lastAttrPath = drainFD(pipe.readSide.get()); - } while (lastAttrPath != ""); + std::cout << state->jobs.dump(2) << "\n"; }); }