diff --git a/src/hydra-eval-jobs/hydra-eval-jobs.cc b/src/hydra-eval-jobs/hydra-eval-jobs.cc index 7485b297..918bd451 100644 --- a/src/hydra-eval-jobs/hydra-eval-jobs.cc +++ b/src/hydra-eval-jobs/hydra-eval-jobs.cc @@ -25,6 +25,28 @@ #include +void check_pid_status_nonblocking(pid_t check_pid) { + // Only check 'initialized' and known PID's + if (check_pid <= 0) { return; } + + int wstatus = 0; + pid_t pid = waitpid(check_pid, &wstatus, WNOHANG); + // -1 = failure, WNOHANG: 0 = no change + if (pid <= 0) { return; } + + std::cerr << "child process (" << pid << ") "; + + if (WIFEXITED(wstatus)) { + std::cerr << "exited with status=" << WEXITSTATUS(wstatus) << std::endl; + } else if (WIFSIGNALED(wstatus)) { + std::cerr << "killed by signal=" << WTERMSIG(wstatus) << std::endl; + } else if (WIFSTOPPED(wstatus)) { + std::cerr << "stopped by signal=" << WSTOPSIG(wstatus) << std::endl; + } else if (WIFCONTINUED(wstatus)) { + std::cerr << "continued" << std::endl; + } +} + using namespace nix; static Path gcRootsDir; @@ -311,8 +333,8 @@ int main(int argc, char * * argv) /* Start a handler thread per worker process. */ auto handler = [&]() { + pid_t pid = -1; try { - pid_t pid = -1; AutoCloseFD from, to; while (true) { @@ -414,6 +436,7 @@ int main(int argc, char * * argv) } } } catch (...) { + check_pid_status_nonblocking(pid); auto state(state_.lock()); state->exc = std::current_exception(); wakeup.notify_all(); diff --git a/t/evaluator/evaluate-oom-job.t b/t/evaluator/evaluate-oom-job.t new file mode 100644 index 00000000..6c17d4e4 --- /dev/null +++ b/t/evaluator/evaluate-oom-job.t @@ -0,0 +1,63 @@ +use strict; +use warnings; +use Setup; +use Test2::V0; +use Hydra::Helper::Exec; + +# Ensure that `systemd-run` is +# - Available in the PATH/envionment +# - Accessable to the user executing it +# - Capable of using the command switches we use in our test +my $sd_res; +eval { + ($sd_res) = captureStdoutStderr(3, ( + "systemd-run", + "--user", + "--collect", + "--scope", + "--property", + "MemoryMax=25M", + "--", + "true" + )); +} or do { + # The command failed to execute, likely because `systemd-run` is not present + # in `PATH` + skip_all("`systemd-run` failed when invoked in this environment"); +}; +if ($sd_res != 0) { + # `systemd-run` executed but `sytemd-run` failed to call `true` and return + # successfully + skip_all("`systemd-run` returned non-zero when executing `true` (expected 0)"); +} + +my $ctx = test_context(); + +# Contain the memory usage to 25 MegaBytes using `systemd-run` +# Run `hydra-eval-jobs` on test job that will purposefully consume all memory +# available +my ($res, $stdout, $stderr) = captureStdoutStderr(60, ( + "systemd-run", + "--user", + "--collect", + "--scope", + "--property", + "MemoryMax=25M", + "--", + "hydra-eval-jobs", + "-I", "/dev/zero", + "-I", $ctx->jobsdir, + ($ctx->jobsdir . "/oom.nix") +)); + +isnt($res, 0, "`hydra-eval-jobs` exits non-zero"); +ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); +like( + $stderr, + # Assert error log contains messages added in PR + # https://github.com/NixOS/hydra/pull/1203 + qr/^child process \(\d+?\) killed by signal=9$/m, + "The stderr record includes a relevant error message" +); + +done_testing; diff --git a/t/jobs/oom.nix b/t/jobs/oom.nix new file mode 100644 index 00000000..abbd0c0d --- /dev/null +++ b/t/jobs/oom.nix @@ -0,0 +1,3 @@ +{ + oom = builtins.readFile "/dev/zero"; +}