Merge pull request #1203 from DeterminateSystems/ds-33/wait-child-on-eof

hydra-eval-jobs: Report forked worker process status information on exception
This commit is contained in:
Graham Christensen 2022-05-02 15:35:12 -04:00 committed by GitHub
commit 7c133a98f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 90 additions and 1 deletions

View file

@ -25,6 +25,28 @@
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
void check_pid_status_nonblocking(pid_t check_pid) {
// Only check 'initialized' and known PID's
if (check_pid <= 0) { return; }
int wstatus = 0;
pid_t pid = waitpid(check_pid, &wstatus, WNOHANG);
// -1 = failure, WNOHANG: 0 = no change
if (pid <= 0) { return; }
std::cerr << "child process (" << pid << ") ";
if (WIFEXITED(wstatus)) {
std::cerr << "exited with status=" << WEXITSTATUS(wstatus) << std::endl;
} else if (WIFSIGNALED(wstatus)) {
std::cerr << "killed by signal=" << WTERMSIG(wstatus) << std::endl;
} else if (WIFSTOPPED(wstatus)) {
std::cerr << "stopped by signal=" << WSTOPSIG(wstatus) << std::endl;
} else if (WIFCONTINUED(wstatus)) {
std::cerr << "continued" << std::endl;
}
}
using namespace nix; using namespace nix;
static Path gcRootsDir; static Path gcRootsDir;
@ -311,8 +333,8 @@ int main(int argc, char * * argv)
/* Start a handler thread per worker process. */ /* Start a handler thread per worker process. */
auto handler = [&]() auto handler = [&]()
{ {
pid_t pid = -1;
try { try {
pid_t pid = -1;
AutoCloseFD from, to; AutoCloseFD from, to;
while (true) { while (true) {
@ -414,6 +436,7 @@ int main(int argc, char * * argv)
} }
} }
} catch (...) { } catch (...) {
check_pid_status_nonblocking(pid);
auto state(state_.lock()); auto state(state_.lock());
state->exc = std::current_exception(); state->exc = std::current_exception();
wakeup.notify_all(); wakeup.notify_all();

View file

@ -0,0 +1,63 @@
use strict;
use warnings;
use Setup;
use Test2::V0;
use Hydra::Helper::Exec;
# Ensure that `systemd-run` is
# - Available in the PATH/envionment
# - Accessable to the user executing it
# - Capable of using the command switches we use in our test
my $sd_res;
eval {
($sd_res) = captureStdoutStderr(3, (
"systemd-run",
"--user",
"--collect",
"--scope",
"--property",
"MemoryMax=25M",
"--",
"true"
));
} or do {
# The command failed to execute, likely because `systemd-run` is not present
# in `PATH`
skip_all("`systemd-run` failed when invoked in this environment");
};
if ($sd_res != 0) {
# `systemd-run` executed but `sytemd-run` failed to call `true` and return
# successfully
skip_all("`systemd-run` returned non-zero when executing `true` (expected 0)");
}
my $ctx = test_context();
# Contain the memory usage to 25 MegaBytes using `systemd-run`
# Run `hydra-eval-jobs` on test job that will purposefully consume all memory
# available
my ($res, $stdout, $stderr) = captureStdoutStderr(60, (
"systemd-run",
"--user",
"--collect",
"--scope",
"--property",
"MemoryMax=25M",
"--",
"hydra-eval-jobs",
"-I", "/dev/zero",
"-I", $ctx->jobsdir,
($ctx->jobsdir . "/oom.nix")
));
isnt($res, 0, "`hydra-eval-jobs` exits non-zero");
ok(utf8::decode($stderr), "Stderr output is UTF8-clean");
like(
$stderr,
# Assert error log contains messages added in PR
# https://github.com/NixOS/hydra/pull/1203
qr/^child process \(\d+?\) killed by signal=9$/m,
"The stderr record includes a relevant error message"
);
done_testing;

3
t/jobs/oom.nix Normal file
View file

@ -0,0 +1,3 @@
{
oom = builtins.readFile "/dev/zero";
}