From cb4fa0000ff4f884bb0064243d99ba4133ee0ae7 Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Thu, 14 Apr 2022 11:03:10 -0400 Subject: [PATCH 01/10] fix(hydra-eval-jobs.cc): add function to report pid status --- src/hydra-eval-jobs/hydra-eval-jobs.cc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/hydra-eval-jobs/hydra-eval-jobs.cc b/src/hydra-eval-jobs/hydra-eval-jobs.cc index 7485b297..bbc55a2b 100644 --- a/src/hydra-eval-jobs/hydra-eval-jobs.cc +++ b/src/hydra-eval-jobs/hydra-eval-jobs.cc @@ -25,6 +25,28 @@ #include +void check_pid_status_quick(pid_t check_pid) { + // Only check 'initialized' and known PID's + if (check_pid <= 0) { return; } + + int wstatus = 0; + pid_t pid = waitpid(check_pid, &wstatus, WNOHANG); + // -1 = failiure, WNOHANG: 0 = no change + if (pid <= 0) { return; } + + std::cerr << "child process (" << pid << ") "; + + if (WIFEXITED(wstatus)) { + std::cerr << "exited with status=" << WEXITSTATUS(wstatus) << std::endl; + } else if (WIFSIGNALED(wstatus)) { + std::cerr << "killed by signal=" << WTERMSIG(wstatus) << std::endl; + } else if (WIFSTOPPED(wstatus)) { + std::cerr << "stopped by signal=" << WSTOPSIG(wstatus) << std::endl; + } else if (WIFCONTINUED(wstatus)) { + std::cerr << "continued" << std::endl; + } +} + using namespace nix; static Path gcRootsDir; From 62cdbc41389757322ddd0255038de2d6c57d197d Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Thu, 14 Apr 2022 11:18:29 -0400 Subject: [PATCH 02/10] feat(hydra-eval-jobs.cc): add check_pid_status_nonblocking to catch handler --- src/hydra-eval-jobs/hydra-eval-jobs.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/hydra-eval-jobs/hydra-eval-jobs.cc b/src/hydra-eval-jobs/hydra-eval-jobs.cc index bbc55a2b..f1cc1434 100644 --- a/src/hydra-eval-jobs/hydra-eval-jobs.cc +++ b/src/hydra-eval-jobs/hydra-eval-jobs.cc @@ -25,7 +25,7 @@ #include -void check_pid_status_quick(pid_t check_pid) { +void check_pid_status_nonblocking(pid_t check_pid) { // Only check 'initialized' and known PID's if (check_pid <= 0) { return; } @@ -333,8 +333,8 @@ int main(int argc, char * * argv) /* Start a handler thread per worker process. */ auto handler = [&]() { + pid_t pid = -1; try { - pid_t pid = -1; AutoCloseFD from, to; while (true) { @@ -436,6 +436,7 @@ int main(int argc, char * * argv) } } } catch (...) { + check_pid_status_nonblocking(pid); auto state(state_.lock()); state->exc = std::current_exception(); wakeup.notify_all(); From 2cdd7974de5e8ed2360caabe586510f0b512feec Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Fri, 29 Apr 2022 13:06:16 -0400 Subject: [PATCH 03/10] fix(hydra-eval-jobs): fix typo --- src/hydra-eval-jobs/hydra-eval-jobs.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hydra-eval-jobs/hydra-eval-jobs.cc b/src/hydra-eval-jobs/hydra-eval-jobs.cc index f1cc1434..918bd451 100644 --- a/src/hydra-eval-jobs/hydra-eval-jobs.cc +++ b/src/hydra-eval-jobs/hydra-eval-jobs.cc @@ -31,7 +31,7 @@ void check_pid_status_nonblocking(pid_t check_pid) { int wstatus = 0; pid_t pid = waitpid(check_pid, &wstatus, WNOHANG); - // -1 = failiure, WNOHANG: 0 = no change + // -1 = failure, WNOHANG: 0 = no change if (pid <= 0) { return; } std::cerr << "child process (" << pid << ") "; From 90769ab5adcb6191e149c65aec72643c89b2a233 Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Mon, 2 May 2022 13:49:32 -0400 Subject: [PATCH 04/10] feat(t/jobs): add test job to cause an OOM --- t/jobs/oom.nix | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 t/jobs/oom.nix diff --git a/t/jobs/oom.nix b/t/jobs/oom.nix new file mode 100644 index 00000000..abbd0c0d --- /dev/null +++ b/t/jobs/oom.nix @@ -0,0 +1,3 @@ +{ + oom = builtins.readFile "/dev/zero"; +} From 2c909c038fad7cd6107706915e819a12e88fe425 Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Mon, 2 May 2022 13:50:57 -0400 Subject: [PATCH 05/10] feat(t/evaluator/hydra-eval-jobs): add basic evaluation test for hydra-eval-jobs --- t/evaluator/evaluate-oom-job.t | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 t/evaluator/evaluate-oom-job.t diff --git a/t/evaluator/evaluate-oom-job.t b/t/evaluator/evaluate-oom-job.t new file mode 100644 index 00000000..dd494c03 --- /dev/null +++ b/t/evaluator/evaluate-oom-job.t @@ -0,0 +1,25 @@ +use strict; +use warnings; +use Setup; +use Test2::V0; +use Hydra::Helper::Exec; + +my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ( + "systemd-run", "--user", "--collect", "--scope", "--property", "MemoryMax=25M", "--", + "hydra-eval-jobs", + "-I", "/dev/zero", + "-I", "./t/jobs", + "./t/jobs/oom.nix" + ) +); + +isnt($res, 0, "hydra-eval-jobs exits non-zero"); +ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); +like( + $stderr, + qr/^child process \(\d+?\) killed by signal=9$/m, + "The stderr record includes a relevant error message" +); + +done_testing; From 01ec004108177c0077f83ac824f92672fd871678 Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Mon, 2 May 2022 14:08:50 -0400 Subject: [PATCH 06/10] feat(t/evaluator/evaluate-oom-job): skip test if systemd-run is not present --- t/evaluator/evaluate-oom-job.t | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/t/evaluator/evaluate-oom-job.t b/t/evaluator/evaluate-oom-job.t index dd494c03..bf8f214f 100644 --- a/t/evaluator/evaluate-oom-job.t +++ b/t/evaluator/evaluate-oom-job.t @@ -4,6 +4,14 @@ use Setup; use Test2::V0; use Hydra::Helper::Exec; +my ($systemdrRes) = captureStdoutStderr(3, ( + "systemd-run", "--user", "--collect", "--scope", "--property", "MemoryMax=25M", "--", + "true" +)); + +skip_all("systemd-run does not work in this environment") if($systemdrRes != 0); + + my ($res, $stdout, $stderr) = captureStdoutStderr(60, ( "systemd-run", "--user", "--collect", "--scope", "--property", "MemoryMax=25M", "--", From e917d9e54662da32d5b4d9a1ff950a05cb7da5c1 Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Mon, 2 May 2022 14:40:13 -0400 Subject: [PATCH 07/10] fix(t/evaluator/evaluate-oom): convert systemd-run presence check to eval, fix indentaion, show relationships between flags and commands with indentation --- t/evaluator/evaluate-oom-job.t | 49 +++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/t/evaluator/evaluate-oom-job.t b/t/evaluator/evaluate-oom-job.t index bf8f214f..8d2264c0 100644 --- a/t/evaluator/evaluate-oom-job.t +++ b/t/evaluator/evaluate-oom-job.t @@ -4,30 +4,41 @@ use Setup; use Test2::V0; use Hydra::Helper::Exec; -my ($systemdrRes) = captureStdoutStderr(3, ( - "systemd-run", "--user", "--collect", "--scope", "--property", "MemoryMax=25M", "--", - "true" +eval { + captureStdoutStderr(3, ( + "systemd-run", + "--user", + "--collect", + "--scope", + "--property", + "MemoryMax=25M", + "--", + "true" + )); +} or do { + skip_all("systemd-run does not work in this environment"); +}; + +my ($res, $stdout, $stderr) = captureStdoutStderr(60, ( + "systemd-run", + "--user", + "--collect", + "--scope", + "--property", + "MemoryMax=25M", + "--", + "hydra-eval-jobs", + "-I", "/dev/zero", + "-I", "./t/jobs", + "./t/jobs/oom.nix" )); -skip_all("systemd-run does not work in this environment") if($systemdrRes != 0); - - -my ($res, $stdout, $stderr) = captureStdoutStderr(60, - ( - "systemd-run", "--user", "--collect", "--scope", "--property", "MemoryMax=25M", "--", - "hydra-eval-jobs", - "-I", "/dev/zero", - "-I", "./t/jobs", - "./t/jobs/oom.nix" - ) -); - isnt($res, 0, "hydra-eval-jobs exits non-zero"); ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); like( - $stderr, - qr/^child process \(\d+?\) killed by signal=9$/m, - "The stderr record includes a relevant error message" + $stderr, + qr/^child process \(\d+?\) killed by signal=9$/m, + "The stderr record includes a relevant error message" ); done_testing; From 013a1dcabc7d4bf06340ab23d6dbfd14a783fdcb Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Mon, 2 May 2022 15:13:59 -0400 Subject: [PATCH 08/10] fix(t/evaluator/evaluate-oom): check that the exit value of the `systemd-run` check is zero. Rework skip messages --- t/evaluator/evaluate-oom-job.t | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/t/evaluator/evaluate-oom-job.t b/t/evaluator/evaluate-oom-job.t index 8d2264c0..7a527825 100644 --- a/t/evaluator/evaluate-oom-job.t +++ b/t/evaluator/evaluate-oom-job.t @@ -4,8 +4,9 @@ use Setup; use Test2::V0; use Hydra::Helper::Exec; +my $sd_res; eval { - captureStdoutStderr(3, ( + ($sd_res) = captureStdoutStderr(3, ( "systemd-run", "--user", "--collect", @@ -16,8 +17,9 @@ eval { "true" )); } or do { - skip_all("systemd-run does not work in this environment"); + skip_all("`systemd-run` failed when invoked in this environment"); }; +if ($sd_res != 0) { skip_all("`systemd-run` returned non-zero when executing `true` (expected 0)"); } my ($res, $stdout, $stderr) = captureStdoutStderr(60, ( "systemd-run", From 87f610e7c18b85c77098b1530b1d015b935710ab Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Mon, 2 May 2022 15:14:46 -0400 Subject: [PATCH 09/10] fix(t/evaluator/evaluate-oom): use `test_context` to get path to ./t/jobs instead of relative paths --- t/evaluator/evaluate-oom-job.t | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/t/evaluator/evaluate-oom-job.t b/t/evaluator/evaluate-oom-job.t index 7a527825..8c8c5f60 100644 --- a/t/evaluator/evaluate-oom-job.t +++ b/t/evaluator/evaluate-oom-job.t @@ -21,6 +21,8 @@ eval { }; if ($sd_res != 0) { skip_all("`systemd-run` returned non-zero when executing `true` (expected 0)"); } +my $ctx = test_context(); + my ($res, $stdout, $stderr) = captureStdoutStderr(60, ( "systemd-run", "--user", @@ -31,11 +33,11 @@ my ($res, $stdout, $stderr) = captureStdoutStderr(60, ( "--", "hydra-eval-jobs", "-I", "/dev/zero", - "-I", "./t/jobs", - "./t/jobs/oom.nix" + "-I", $ctx->jobsdir, + ($ctx->jobsdir . "/oom.nix") )); -isnt($res, 0, "hydra-eval-jobs exits non-zero"); +isnt($res, 0, "`hydra-eval-jobs` exits non-zero"); ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); like( $stderr, From 065039beba4aa8fc998762b145aa6176daa44522 Mon Sep 17 00:00:00 2001 From: Kayla Firestack Date: Mon, 2 May 2022 15:26:26 -0400 Subject: [PATCH 10/10] feat(t/evaluator/evaluate-oom): comment intentions --- t/evaluator/evaluate-oom-job.t | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/t/evaluator/evaluate-oom-job.t b/t/evaluator/evaluate-oom-job.t index 8c8c5f60..6c17d4e4 100644 --- a/t/evaluator/evaluate-oom-job.t +++ b/t/evaluator/evaluate-oom-job.t @@ -4,6 +4,10 @@ use Setup; use Test2::V0; use Hydra::Helper::Exec; +# Ensure that `systemd-run` is +# - Available in the PATH/envionment +# - Accessable to the user executing it +# - Capable of using the command switches we use in our test my $sd_res; eval { ($sd_res) = captureStdoutStderr(3, ( @@ -17,12 +21,21 @@ eval { "true" )); } or do { + # The command failed to execute, likely because `systemd-run` is not present + # in `PATH` skip_all("`systemd-run` failed when invoked in this environment"); }; -if ($sd_res != 0) { skip_all("`systemd-run` returned non-zero when executing `true` (expected 0)"); } +if ($sd_res != 0) { + # `systemd-run` executed but `sytemd-run` failed to call `true` and return + # successfully + skip_all("`systemd-run` returned non-zero when executing `true` (expected 0)"); +} my $ctx = test_context(); +# Contain the memory usage to 25 MegaBytes using `systemd-run` +# Run `hydra-eval-jobs` on test job that will purposefully consume all memory +# available my ($res, $stdout, $stderr) = captureStdoutStderr(60, ( "systemd-run", "--user", @@ -41,6 +54,8 @@ isnt($res, 0, "`hydra-eval-jobs` exits non-zero"); ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); like( $stderr, + # Assert error log contains messages added in PR + # https://github.com/NixOS/hydra/pull/1203 qr/^child process \(\d+?\) killed by signal=9$/m, "The stderr record includes a relevant error message" );