forked from lix-project/lix
nix eval-hydra-jobs: Support parallel evaluation
Example usage: $ nix eval-hydra-jobs -f '<nixpkgs/pkgs/top-level/release.nix>' '' \ --max-memory-size 2048 --workers 8
This commit is contained in:
parent
e375da6899
commit
4c24263967
1 changed files with 233 additions and 90 deletions
|
@ -5,6 +5,10 @@
|
||||||
#include "common-args.hh"
|
#include "common-args.hh"
|
||||||
#include "json.hh"
|
#include "json.hh"
|
||||||
#include "get-drvs.hh"
|
#include "get-drvs.hh"
|
||||||
|
#include "attr-path.hh"
|
||||||
|
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
|
||||||
using namespace nix;
|
using namespace nix;
|
||||||
|
|
||||||
|
@ -36,6 +40,8 @@ static std::string queryMetaStrings(EvalState & state, DrvInfo & drv, const stri
|
||||||
struct CmdEvalHydraJobs : MixJSON, MixDryRun, InstallableCommand
|
struct CmdEvalHydraJobs : MixJSON, MixDryRun, InstallableCommand
|
||||||
{
|
{
|
||||||
std::optional<Path> gcRootsDir;
|
std::optional<Path> gcRootsDir;
|
||||||
|
size_t nrWorkers = 1;
|
||||||
|
size_t maxMemorySize = 4ULL * 1024;
|
||||||
|
|
||||||
CmdEvalHydraJobs()
|
CmdEvalHydraJobs()
|
||||||
{
|
{
|
||||||
|
@ -44,6 +50,10 @@ struct CmdEvalHydraJobs : MixJSON, MixDryRun, InstallableCommand
|
||||||
.description("garbage collector roots directory")
|
.description("garbage collector roots directory")
|
||||||
.labels({"path"})
|
.labels({"path"})
|
||||||
.dest(&gcRootsDir);
|
.dest(&gcRootsDir);
|
||||||
|
|
||||||
|
mkIntFlag(0, "workers", "number of concurrent worker processes", &nrWorkers);
|
||||||
|
|
||||||
|
mkIntFlag(0, "max-memory-size", "maximum memory usage per worker process (in MiB)", &maxMemorySize);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string description() override
|
std::string description() override
|
||||||
|
@ -61,11 +71,11 @@ struct CmdEvalHydraJobs : MixJSON, MixDryRun, InstallableCommand
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
void run(ref<Store> store) override
|
void worker(AutoCloseFD & to, AutoCloseFD & from)
|
||||||
{
|
{
|
||||||
auto state = getEvalState();
|
auto state = getEvalState();
|
||||||
|
|
||||||
if (!gcRootsDir) warn("'--gc-roots-dir' not specified");
|
// FIXME: should re-open state->store.
|
||||||
|
|
||||||
if (dryRun) settings.readOnlyMode = true;
|
if (dryRun) settings.readOnlyMode = true;
|
||||||
|
|
||||||
|
@ -73,118 +83,251 @@ struct CmdEvalHydraJobs : MixJSON, MixDryRun, InstallableCommand
|
||||||
to the environment. */
|
to the environment. */
|
||||||
evalSettings.restrictEval = true;
|
evalSettings.restrictEval = true;
|
||||||
|
|
||||||
auto v = installable->toValue(*state).first;
|
|
||||||
|
|
||||||
auto jsonObj = json ? std::make_unique<JSONObject>(std::cout, true) : nullptr;
|
|
||||||
|
|
||||||
std::function<void(Value & vIn, const string & attrPath)> findJobs;
|
|
||||||
|
|
||||||
auto autoArgs = getAutoArgs(*state);
|
auto autoArgs = getAutoArgs(*state);
|
||||||
|
|
||||||
findJobs = [&](Value & vIn, const string & attrPath)
|
auto vTop = installable->toValue(*state).first;
|
||||||
{
|
|
||||||
|
auto vRoot = state->allocValue();
|
||||||
|
state->autoCallFunction(*autoArgs, *vTop, *vRoot);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
/* Wait for the master to send us a job name. */
|
||||||
|
writeLine(to.get(), "next");
|
||||||
|
|
||||||
|
auto s = readLine(from.get());
|
||||||
|
if (s == "exit") break;
|
||||||
|
if (!hasPrefix(s, "do ")) abort();
|
||||||
|
std::string attrPath(s, 3);
|
||||||
|
|
||||||
|
debug("worker process %d at '%s'", getpid(), attrPath);
|
||||||
|
|
||||||
|
/* Evaluate it and send info back to the master. */
|
||||||
|
nlohmann::json reply;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Activity act(*logger, lvlInfo, actUnknown, fmt("evaluating '%s'", attrPath));
|
auto v = findAlongAttrPath(*state, attrPath, *autoArgs, *vRoot).first;
|
||||||
|
|
||||||
checkInterrupt();
|
state->forceValue(*v);
|
||||||
|
|
||||||
auto v = state->allocValue();
|
if (auto drv = getDerivation(*state, *v, false)) {
|
||||||
state->autoCallFunction(*autoArgs, vIn, *v);
|
|
||||||
|
|
||||||
if (v->type == tAttrs) {
|
DrvInfo::Outputs outputs = drv->queryOutputs();
|
||||||
auto drv = getDerivation(*state, *v, false);
|
|
||||||
|
|
||||||
if (drv) {
|
if (drv->querySystem() == "unknown")
|
||||||
|
throw EvalError("derivation must have a 'system' attribute");
|
||||||
|
|
||||||
DrvInfo::Outputs outputs = drv->queryOutputs();
|
auto drvPath = drv->queryDrvPath();
|
||||||
|
|
||||||
if (drv->querySystem() == "unknown")
|
nlohmann::json job;
|
||||||
throw EvalError("derivation must have a 'system' attribute");
|
|
||||||
|
|
||||||
auto drvPath = drv->queryDrvPath();
|
job["nixName"] = drv->queryName();
|
||||||
|
job["system"] =drv->querySystem();
|
||||||
|
job["drvPath"] = drvPath;
|
||||||
|
job["description"] = drv->queryMetaString("description");
|
||||||
|
job["license"] = queryMetaStrings(*state, *drv, "license", "shortName");
|
||||||
|
job["homepage"] = drv->queryMetaString("homepage");
|
||||||
|
job["maintainers"] = queryMetaStrings(*state, *drv, "maintainers", "email");
|
||||||
|
job["schedulingPriority"] = drv->queryMetaInt("schedulingPriority", 100);
|
||||||
|
job["timeout"] = drv->queryMetaInt("timeout", 36000);
|
||||||
|
job["maxSilent"] = drv->queryMetaInt("maxSilent", 7200);
|
||||||
|
job["isChannel"] = drv->queryMetaBool("isHydraChannel", false);
|
||||||
|
|
||||||
if (jsonObj) {
|
/* If this is an aggregate, then get its constituents. */
|
||||||
auto res = jsonObj->object(attrPath);
|
auto a = v->attrs->get(state->symbols.create("_hydraAggregate"));
|
||||||
res.attr("nixName", drv->queryName());
|
if (a && state->forceBool(*a->value, *a->pos)) {
|
||||||
res.attr("system", drv->querySystem());
|
auto a = v->attrs->get(state->symbols.create("constituents"));
|
||||||
res.attr("drvPath", drvPath);
|
if (!a)
|
||||||
res.attr("description", drv->queryMetaString("description"));
|
throw EvalError("derivation must have a ‘constituents’ attribute");
|
||||||
res.attr("license", queryMetaStrings(*state, *drv, "license", "shortName"));
|
PathSet context;
|
||||||
res.attr("homepage", drv->queryMetaString("homepage"));
|
state->coerceToString(*a->pos, *a->value, context, true, false);
|
||||||
res.attr("maintainers", queryMetaStrings(*state, *drv, "maintainers", "email"));
|
PathSet drvs;
|
||||||
res.attr("schedulingPriority", drv->queryMetaInt("schedulingPriority", 100));
|
for (auto & i : context)
|
||||||
res.attr("timeout", drv->queryMetaInt("timeout", 36000));
|
if (i.at(0) == '!') {
|
||||||
res.attr("maxSilent", drv->queryMetaInt("maxSilent", 7200));
|
size_t index = i.find("!", 1);
|
||||||
res.attr("isChannel", drv->queryMetaBool("isHydraChannel", false));
|
drvs.insert(string(i, index + 1));
|
||||||
|
|
||||||
/* If this is an aggregate, then get its constituents. */
|
|
||||||
auto a = v->attrs->get(state->symbols.create("_hydraAggregate"));
|
|
||||||
if (a && state->forceBool(*a->value, *a->pos)) {
|
|
||||||
auto a = v->attrs->get(state->symbols.create("constituents"));
|
|
||||||
if (!a)
|
|
||||||
throw EvalError("derivation must have a ‘constituents’ attribute");
|
|
||||||
PathSet context;
|
|
||||||
state->coerceToString(*a->pos, *a->value, context, true, false);
|
|
||||||
PathSet drvs;
|
|
||||||
for (auto & i : context)
|
|
||||||
if (i.at(0) == '!') {
|
|
||||||
size_t index = i.find("!", 1);
|
|
||||||
drvs.insert(string(i, index + 1));
|
|
||||||
}
|
|
||||||
res.attr("constituents", concatStringsSep(" ", drvs));
|
|
||||||
}
|
}
|
||||||
|
job["constituents"] = concatStringsSep(" ", drvs);
|
||||||
/* Register the derivation as a GC root. !!! This
|
|
||||||
registers roots for jobs that we may have already
|
|
||||||
done. */
|
|
||||||
auto localStore = state->store.dynamic_pointer_cast<LocalFSStore>();
|
|
||||||
if (gcRootsDir && localStore) {
|
|
||||||
Path root = *gcRootsDir + "/" + std::string(baseNameOf(drvPath));
|
|
||||||
if (!pathExists(root))
|
|
||||||
localStore->addPermRoot(localStore->parseStorePath(drvPath), root, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto res2 = res.object("outputs");
|
|
||||||
for (auto & j : outputs)
|
|
||||||
res2.attr(j.first, j.second);
|
|
||||||
} else
|
|
||||||
std::cout << fmt("%d: %d\n", attrPath, drvPath);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
/* Register the derivation as a GC root. !!! This
|
||||||
if (!state->isDerivation(*v)) {
|
registers roots for jobs that we may have already
|
||||||
for (auto & i : v->attrs->lexicographicOrder()) {
|
done. */
|
||||||
std::string name(i->name);
|
auto localStore = state->store.dynamic_pointer_cast<LocalFSStore>();
|
||||||
|
if (gcRootsDir && localStore) {
|
||||||
|
Path root = *gcRootsDir + "/" + std::string(baseNameOf(drvPath));
|
||||||
|
if (!pathExists(root))
|
||||||
|
localStore->addPermRoot(localStore->parseStorePath(drvPath), root, false);
|
||||||
|
}
|
||||||
|
|
||||||
/* Skip jobs with dots in the name. */
|
nlohmann::json out;
|
||||||
if (name.find('.') != std::string::npos) {
|
for (auto & j : outputs)
|
||||||
printError("skipping job with illegal name '%s'", name);
|
out[j.first] = j.second;
|
||||||
continue;
|
job["outputs"] = std::move(out);
|
||||||
}
|
|
||||||
|
|
||||||
findJobs(*i->value, (attrPath.empty() ? "" : attrPath + ".") + name);
|
reply["job"] = std::move(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else if (v->type == tAttrs) {
|
||||||
|
auto attrs = nlohmann::json::array();
|
||||||
|
StringSet ss;
|
||||||
|
for (auto & i : v->attrs->lexicographicOrder()) {
|
||||||
|
std::string name(i->name);
|
||||||
|
if (name.find('.') != std::string::npos || name.find(' ') != std::string::npos) {
|
||||||
|
printError("skipping job with illegal name '%s'", name);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
attrs.push_back(name);
|
||||||
}
|
}
|
||||||
|
reply["attrs"] = std::move(attrs);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (v->type == tNull) {
|
|
||||||
// allow null values, meaning 'do nothing'
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
throw TypeError("unsupported value: %s", *v);
|
|
||||||
|
|
||||||
} catch (EvalError & e) {
|
} catch (EvalError & e) {
|
||||||
if (jsonObj)
|
reply["error"] = filterANSIEscapes(e.msg(), true);
|
||||||
jsonObj->object(attrPath).attr("error", filterANSIEscapes(e.msg(), true));
|
}
|
||||||
else
|
|
||||||
printError("in job '%s': %s", attrPath, e.what());
|
writeLine(to.get(), reply.dump());
|
||||||
|
|
||||||
|
/* If our RSS exceeds the maximum, exit. The master will
|
||||||
|
start a new process. */
|
||||||
|
struct rusage r;
|
||||||
|
getrusage(RUSAGE_SELF, &r);
|
||||||
|
if ((size_t) r.ru_maxrss > maxMemorySize * 1024) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
writeLine(to.get(), "restart");
|
||||||
|
}
|
||||||
|
|
||||||
|
void run(ref<Store> store) override
|
||||||
|
{
|
||||||
|
if (!gcRootsDir) warn("'--gc-roots-dir' not specified");
|
||||||
|
|
||||||
|
struct State
|
||||||
|
{
|
||||||
|
std::set<std::string> todo{""};
|
||||||
|
std::set<std::string> active;
|
||||||
|
nlohmann::json result;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::condition_variable wakeup;
|
||||||
|
|
||||||
|
Sync<State> state_;
|
||||||
|
|
||||||
|
/* Start a handler thread per worker process. */
|
||||||
|
auto handler = [this, &state_, &wakeup]()
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
pid_t pid = -1;
|
||||||
|
AutoCloseFD from, to;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
|
||||||
|
/* Start a new worker process if necessary. */
|
||||||
|
if (pid == -1) {
|
||||||
|
Pipe toPipe, fromPipe;
|
||||||
|
toPipe.create();
|
||||||
|
fromPipe.create();
|
||||||
|
pid = startProcess(
|
||||||
|
[this,
|
||||||
|
to{std::make_shared<AutoCloseFD>(std::move(fromPipe.writeSide))},
|
||||||
|
from{std::make_shared<AutoCloseFD>(std::move(toPipe.readSide))}
|
||||||
|
]()
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
worker(*to, *from);
|
||||||
|
} catch (Error & e) {
|
||||||
|
printError("unexpected worker error: %s", e.msg());
|
||||||
|
_exit(1);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
ProcessOptions { .allowVfork = false });
|
||||||
|
from = std::move(fromPipe.readSide);
|
||||||
|
to = std::move(toPipe.writeSide);
|
||||||
|
debug("created worker process %d", pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check whether the existing worker process is still there. */
|
||||||
|
auto s = readLine(from.get());
|
||||||
|
if (s == "restart") {
|
||||||
|
pid = -1;
|
||||||
|
continue;
|
||||||
|
} else if (s != "next")
|
||||||
|
throw Error("unexpected worker request: %s", s);
|
||||||
|
|
||||||
|
/* Wait for a job name to become available. */
|
||||||
|
std::string attrPath;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
checkInterrupt();
|
||||||
|
auto state(state_.lock());
|
||||||
|
if (state->todo.empty() && state->active.empty()) {
|
||||||
|
writeLine(to.get(), "exit");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!state->todo.empty()) {
|
||||||
|
attrPath = *state->todo.begin();
|
||||||
|
state->todo.erase(state->todo.begin());
|
||||||
|
state->active.insert(attrPath);
|
||||||
|
break;
|
||||||
|
} else
|
||||||
|
state.wait(wakeup);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tell the worker to evaluate it. */
|
||||||
|
writeLine(to.get(), "do " + attrPath);
|
||||||
|
|
||||||
|
/* Wait for the response. */
|
||||||
|
auto response = nlohmann::json::parse(readLine(from.get()));
|
||||||
|
|
||||||
|
/* Handle the response. */
|
||||||
|
StringSet newAttrs;
|
||||||
|
|
||||||
|
if (response.find("job") != response.end()) {
|
||||||
|
auto state(state_.lock());
|
||||||
|
if (json)
|
||||||
|
state->result[attrPath] = response["job"];
|
||||||
|
else
|
||||||
|
std::cout << fmt("%d: %d\n", attrPath, (std::string) response["job"]["drvPath"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response.find("attrs") != response.end()) {
|
||||||
|
for (auto & i : response["attrs"]) {
|
||||||
|
auto s = (attrPath.empty() ? "" : attrPath + ".") + (std::string) i;
|
||||||
|
newAttrs.insert(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response.find("error") != response.end()) {
|
||||||
|
auto state(state_.lock());
|
||||||
|
if (json)
|
||||||
|
state->result[attrPath]["error"] = response["error"];
|
||||||
|
else
|
||||||
|
printError("error in job '%s': %s",
|
||||||
|
attrPath, (std::string) response["error"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add newly discovered job names to the queue. */
|
||||||
|
{
|
||||||
|
auto state(state_.lock());
|
||||||
|
state->active.erase(attrPath);
|
||||||
|
for (auto & s : newAttrs)
|
||||||
|
state->todo.insert(s);
|
||||||
|
wakeup.notify_all();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Error & e) {
|
||||||
|
printError("unexpected handler thread error: %s", e.msg());
|
||||||
|
abort();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
findJobs(*v, "");
|
std::vector<std::thread> threads;
|
||||||
|
for (size_t i = 0; i < nrWorkers; i++)
|
||||||
|
threads.emplace_back(std::thread(handler));
|
||||||
|
|
||||||
|
for (auto & thread : threads)
|
||||||
|
thread.join();
|
||||||
|
|
||||||
|
if (json) std::cout << state_.lock()->result.dump(2) << "\n";
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue