From 9d4c256fa05468fab900b778473d1b380915a030 Mon Sep 17 00:00:00 2001 From: John Soo Date: Thu, 14 Apr 2022 23:28:19 -0700 Subject: [PATCH 1/3] Make a Proc struct for running processes. --- src/nix-eval-jobs.cc | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/nix-eval-jobs.cc b/src/nix-eval-jobs.cc index fd33320..045577b 100644 --- a/src/nix-eval-jobs.cc +++ b/src/nix-eval-jobs.cc @@ -298,6 +298,50 @@ static void worker( writeLine(to.get(), "restart"); } +typedef std::function + Processor; + +/* Auto-cleanup of fork's process and fds. */ +struct Proc { + AutoCloseFD to, from; + Pid pid; + + Proc(const Processor & proc) { + Pipe toPipe, fromPipe; + toPipe.create(); + fromPipe.create(); + auto p = startProcess( + [&, + to{std::make_shared(std::move(fromPipe.writeSide))}, + from{std::make_shared(std::move(toPipe.readSide))} + ]() + { + debug("created worker process %d", getpid()); + try { + EvalState state(myArgs.searchPath, openStore()); + Bindings & autoArgs = *myArgs.getAutoArgs(state); + proc(state, autoArgs, *to, *from); + } catch (Error & e) { + nlohmann::json err; + auto msg = e.msg(); + err["error"] = filterANSIEscapes(msg, true); + printError(msg); + writeLine(to->get(), err.dump()); + // Don't forget to print it into the STDERR log, this is + // what's shown in the Hydra UI. + writeLine(to->get(), "restart"); + } + }, + ProcessOptions { .allowVfork = false }); + + to = std::move(toPipe.writeSide); + from = std::move(fromPipe.readSide); + pid = p; + } + + ~Proc() { } +}; + int main(int argc, char * * argv) { /* Prevent undeclared dependencies in the evaluation via From a27faabd0a3fc0a693433fa5b277587127c14fb3 Mon Sep 17 00:00:00 2001 From: John Soo Date: Thu, 21 Apr 2022 10:46:38 -0700 Subject: [PATCH 2/3] Use Proc struct to manage forked processes. Cleans up zombie processes. --- src/nix-eval-jobs.cc | 56 +++++++++++--------------------------------- 1 file changed, 14 insertions(+), 42 deletions(-) diff --git a/src/nix-eval-jobs.cc b/src/nix-eval-jobs.cc index 045577b..d71cf8d 100644 --- a/src/nix-eval-jobs.cc +++ b/src/nix-eval-jobs.cc @@ -177,8 +177,7 @@ static void worker( EvalState & state, Bindings & autoArgs, AutoCloseFD & to, - AutoCloseFD & from, - const Path &gcRootsDir) + AutoCloseFD & from) { auto vRoot = topLevelValue(state, autoArgs); @@ -243,8 +242,8 @@ static void worker( /* Register the derivation as a GC root. !!! This registers roots for jobs that we may have already done. */ - if (gcRootsDir != "") { - Path root = gcRootsDir + "/" + std::string(baseNameOf(drvPath)); + if (myArgs.gcRootsDir != "") { + Path root = myArgs.gcRootsDir + "/" + std::string(baseNameOf(drvPath)); if (!pathExists(root)) localStore->addPermRoot(storePath, root); } @@ -391,47 +390,18 @@ int main(int argc, char * * argv) auto handler = [&]() { try { - pid_t pid = -1; - AutoCloseFD from, to; + std::optional> proc_; while (true) { - /* Start a new worker process if necessary. */ - if (pid == -1) { - Pipe toPipe, fromPipe; - toPipe.create(); - fromPipe.create(); - pid = startProcess( - [&, - to{std::make_shared(std::move(fromPipe.writeSide))}, - from{std::make_shared(std::move(toPipe.readSide))} - ]() - { - try { - EvalState state(myArgs.searchPath, openStore()); - Bindings & autoArgs = *myArgs.getAutoArgs(state); - worker(state, autoArgs, *to, *from, myArgs.gcRootsDir); - } catch (Error & e) { - nlohmann::json err; - auto msg = e.msg(); - err["error"] = filterANSIEscapes(msg, true); - printError(msg); - writeLine(to->get(), err.dump()); - // Don't forget to print it into the STDERR log, this is - // what's shown in the Hydra UI. - writeLine(to->get(), "restart"); - } - }, - ProcessOptions { .allowVfork = false }); - from = std::move(fromPipe.readSide); - to = std::move(toPipe.writeSide); - debug("created worker process %d", pid); - } + auto proc = proc_.has_value() + ? std::move(proc_.value()) + : std::make_unique(worker); /* Check whether the existing worker process is still there. */ - auto s = readLine(from.get()); + auto s = readLine(proc->from.get()); if (s == "restart") { - pid = -1; + proc_ = std::nullopt; continue; } else if (s != "next") { auto json = nlohmann::json::parse(s); @@ -445,7 +415,7 @@ int main(int argc, char * * argv) checkInterrupt(); auto state(state_.lock()); if ((state->todo.empty() && state->active.empty()) || state->exc) { - writeLine(to.get(), "exit"); + writeLine(proc->to.get(), "exit"); return; } if (!state->todo.empty()) { @@ -458,10 +428,10 @@ int main(int argc, char * * argv) } /* Tell the worker to evaluate it. */ - writeLine(to.get(), "do " + attrPath); + writeLine(proc->to.get(), "do " + attrPath); /* Wait for the response. */ - auto respString = readLine(from.get()); + auto respString = readLine(proc->from.get()); auto response = nlohmann::json::parse(respString); /* Handle the response. */ @@ -476,6 +446,8 @@ int main(int argc, char * * argv) std::cout << respString << "\n" << std::flush; } + proc_ = std::move(proc); + /* Add newly discovered job names to the queue. */ { auto state(state_.lock()); From b9a87464a06c60598d0948e974ee6538ab024f60 Mon Sep 17 00:00:00 2001 From: John Soo Date: Wed, 20 Apr 2022 21:01:02 -0700 Subject: [PATCH 3/3] Move collecting handler to separate function. s/master/collector/g --- src/nix-eval-jobs.cc | 182 +++++++++++++++++++++---------------------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/src/nix-eval-jobs.cc b/src/nix-eval-jobs.cc index d71cf8d..0e5d78a 100644 --- a/src/nix-eval-jobs.cc +++ b/src/nix-eval-jobs.cc @@ -182,7 +182,7 @@ static void worker( auto vRoot = topLevelValue(state, autoArgs); while (true) { - /* Wait for the master to send us a job name. */ + /* Wait for the collector to send us a job name. */ writeLine(to.get(), "next"); auto s = readLine(from.get()); @@ -192,7 +192,7 @@ static void worker( debug("worker process %d at '%s'", getpid(), attrPath); - /* Evaluate it and send info back to the master. */ + /* Evaluate it and send info back to the collector. */ nlohmann::json reply; reply["attr"] = attrPath; @@ -287,7 +287,7 @@ static void worker( writeLine(to.get(), reply.dump()); - /* If our RSS exceeds the maximum, exit. The master will + /* If our RSS exceeds the maximum, exit. The collector will start a new process. */ struct rusage r; getrusage(RUSAGE_SELF, &r); @@ -341,6 +341,91 @@ struct Proc { ~Proc() { } }; +struct State +{ + std::set todo{""}; + std::set active; + std::exception_ptr exc; +}; + +std::function collector(Sync & state_, std::condition_variable & wakeup) { + return [&]() { + try { + std::optional> proc_; + + while (true) { + + auto proc = proc_.has_value() + ? std::move(proc_.value()) + : std::make_unique(worker); + + /* Check whether the existing worker process is still there. */ + auto s = readLine(proc->from.get()); + if (s == "restart") { + proc_ = std::nullopt; + continue; + } else if (s != "next") { + auto json = nlohmann::json::parse(s); + throw Error("worker error: %s", (std::string) json["error"]); + } + + /* Wait for a job name to become available. */ + std::string attrPath; + + while (true) { + checkInterrupt(); + auto state(state_.lock()); + if ((state->todo.empty() && state->active.empty()) || state->exc) { + writeLine(proc->to.get(), "exit"); + return; + } + if (!state->todo.empty()) { + attrPath = *state->todo.begin(); + state->todo.erase(state->todo.begin()); + state->active.insert(attrPath); + break; + } else + state.wait(wakeup); + } + + /* Tell the worker to evaluate it. */ + writeLine(proc->to.get(), "do " + attrPath); + + /* Wait for the response. */ + auto respString = readLine(proc->from.get()); + auto response = nlohmann::json::parse(respString); + + /* Handle the response. */ + StringSet newAttrs; + if (response.find("attrs") != response.end()) { + for (auto & i : response["attrs"]) { + auto s = (attrPath.empty() ? "" : attrPath + ".") + (std::string) i; + newAttrs.insert(s); + } + } else { + auto state(state_.lock()); + std::cout << respString << "\n" << std::flush; + } + + proc_ = std::move(proc); + + /* Add newly discovered job names to the queue. */ + { + auto state(state_.lock()); + state->active.erase(attrPath); + for (auto & s : newAttrs) + state->todo.insert(s); + wakeup.notify_all(); + } + } + } catch (...) { + auto state(state_.lock()); + state->exc = std::current_exception(); + wakeup.notify_all(); + } + }; +} + int main(int argc, char * * argv) { /* Prevent undeclared dependencies in the evaluation via @@ -375,98 +460,13 @@ int main(int argc, char * * argv) loggerSettings.showTrace.assign(true); } - struct State - { - std::set todo{""}; - std::set active; - std::exception_ptr exc; - }; - - std::condition_variable wakeup; - Sync state_; - /* Start a handler thread per worker process. */ - auto handler = [&]() - { - try { - std::optional> proc_; - - while (true) { - - auto proc = proc_.has_value() - ? std::move(proc_.value()) - : std::make_unique(worker); - - /* Check whether the existing worker process is still there. */ - auto s = readLine(proc->from.get()); - if (s == "restart") { - proc_ = std::nullopt; - continue; - } else if (s != "next") { - auto json = nlohmann::json::parse(s); - throw Error("worker error: %s", (std::string) json["error"]); - } - - /* Wait for a job name to become available. */ - std::string attrPath; - - while (true) { - checkInterrupt(); - auto state(state_.lock()); - if ((state->todo.empty() && state->active.empty()) || state->exc) { - writeLine(proc->to.get(), "exit"); - return; - } - if (!state->todo.empty()) { - attrPath = *state->todo.begin(); - state->todo.erase(state->todo.begin()); - state->active.insert(attrPath); - break; - } else - state.wait(wakeup); - } - - /* Tell the worker to evaluate it. */ - writeLine(proc->to.get(), "do " + attrPath); - - /* Wait for the response. */ - auto respString = readLine(proc->from.get()); - auto response = nlohmann::json::parse(respString); - - /* Handle the response. */ - StringSet newAttrs; - if (response.find("attrs") != response.end()) { - for (auto & i : response["attrs"]) { - auto s = (attrPath.empty() ? "" : attrPath + ".") + (std::string) i; - newAttrs.insert(s); - } - } else { - auto state(state_.lock()); - std::cout << respString << "\n" << std::flush; - } - - proc_ = std::move(proc); - - /* Add newly discovered job names to the queue. */ - { - auto state(state_.lock()); - state->active.erase(attrPath); - for (auto & s : newAttrs) - state->todo.insert(s); - wakeup.notify_all(); - } - } - } catch (...) { - auto state(state_.lock()); - state->exc = std::current_exception(); - wakeup.notify_all(); - } - }; - + /* Start a collector thread per worker process. */ std::vector threads; + std::condition_variable wakeup; for (size_t i = 0; i < myArgs.nrWorkers; i++) - threads.emplace_back(std::thread(handler)); + threads.emplace_back(std::thread(collector(state_, wakeup))); for (auto & thread : threads) thread.join();