From 5506962537354fda90f1c988a993119b11419a66 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 10 Feb 2022 09:37:48 -0500 Subject: [PATCH 1/7] Test::Postgresql: update to a tagged fork to improve paralellism --- flake.nix | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/flake.nix b/flake.nix index 3becf22a..aa3b53bd 100644 --- a/flake.nix +++ b/flake.nix @@ -41,10 +41,12 @@ perlPackages = prev.perlPackages // { TestPostgreSQL = final.perlPackages.buildPerlModule { pname = "Test-PostgreSQL"; - version = "1.27"; - src = final.fetchurl { - url = "mirror://cpan/authors/id/T/TJ/TJC/Test-PostgreSQL-1.27.tar.gz"; - sha256 = "b1bd231693100cc40905fb0ba3173173201621de9c8301f21c5b593b0a46f907"; + version = "1.28-1"; + src = final.fetchFromGitHub { + owner = "grahamc"; + repo = "Test-postgresql"; + rev = "release-1.28-1"; + hash = "sha256-SFC1C3q3dbcBos18CYd/s0TIcfJW4g04ld0+XQXVToQ="; }; buildInputs = with final.perlPackages; [ ModuleBuildTiny TestSharedFork pkgs.postgresql ]; propagatedBuildInputs = with final.perlPackages; [ DBDPg DBI FileWhich FunctionParameters Moo TieHashMethod TryTiny TypeTiny ]; @@ -52,7 +54,7 @@ makeMakerFlags = "POSTGRES_HOME=${final.postgresql}"; meta = { - homepage = https://github.com/TJC/Test-postgresql; + homepage = "https://github.com/grahamc/Test-postgresql/releases/tag/release-1.28-1"; description = "PostgreSQL runner for tests"; license = with final.lib.licenses; [ artistic2 ]; }; From 09652475bd17d084cfceb133c23f0ace16564757 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 10 Feb 2022 10:00:32 -0500 Subject: [PATCH 2/7] Set the YATH_JOB_COUNt to NIX_BUILD_CORES if no parallelism variables are set --- t/test.pl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/t/test.pl b/t/test.pl index fc5b215a..7f17f107 100644 --- a/t/test.pl +++ b/t/test.pl @@ -11,6 +11,15 @@ BEGIN { $App::Yath::Script::SCRIPT = which 'yath'; } use App::Yath::Util qw/find_yath/; +use List::SomeUtils qw(none); + +if (defined($ENV{"NIX_BUILD_CORES"}) + and not defined($ENV{"YATH_JOB_COUNT"}) + and not defined($ENV{"T2_HARNESS_JOB_COUNT"}) + and not defined($ENV{"T2_HARNESS_JOB_COUNT"})) { + $ENV{"YATH_JOB_COUNT"} = $ENV{"NIX_BUILD_CORES"}; + print STDERR "test.pl: Defaulting \$YATH_JOB_COUNT to \$NIX_BUILD_CORES (${\$ENV{'NIX_BUILD_CORES'}})\n"; +} system($^X, find_yath(), '-D', 'test', '--default-search' => './', @ARGV); my $exit = $?; From 4acaf9c8b05dbdac88c7fbf153a11282fc4096f1 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 10 Feb 2022 10:51:12 -0500 Subject: [PATCH 3/7] hydra-queue-runner: don't dispatch until the machines parser has completed one run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Periodically, I have seen tests fail because of out of order queue runner behavior: checking the queue for builds > 0... loading build 1 (tests:basic:empty_dir) aborting unsupported build step '...-empty-dir.drv' (type 'x86_64-linux') marking build 1 as failed adding new machine ‘localhost’ This patch should prevent the dispatcher from running before any machines are made available. --- src/hydra-queue-runner/dispatcher.cc | 4 +++- src/hydra-queue-runner/hydra-queue-runner.cc | 10 +++++++++- src/hydra-queue-runner/state.hh | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/hydra-queue-runner/dispatcher.cc b/src/hydra-queue-runner/dispatcher.cc index 8c497a66..d2bb3c90 100644 --- a/src/hydra-queue-runner/dispatcher.cc +++ b/src/hydra-queue-runner/dispatcher.cc @@ -31,8 +31,10 @@ void State::makeRunnable(Step::ptr step) void State::dispatcher() { - while (true) { + printMsg(lvlDebug, "Waiting for the machines parsing to have completed at least once"); + machinesReadyLock.lock(); + while (true) { try { printMsg(lvlDebug, "dispatcher woken up"); nrDispatcherWakeups++; diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 62eb572c..3297730c 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -158,6 +158,7 @@ void State::monitorMachinesFile() (settings.thisSystem == "x86_64-linux" ? "x86_64-linux,i686-linux" : settings.thisSystem.get()) + " - " + std::to_string(settings.maxBuildJobs) + " 1 " + concatStringsSep(",", settings.systemFeatures.get())); + machinesReadyLock.unlock(); return; } @@ -203,9 +204,15 @@ void State::monitorMachinesFile() parseMachines(contents); }; + auto firstParse = true; + while (true) { try { readMachinesFiles(); + if (firstParse) { + machinesReadyLock.unlock(); + firstParse = false; + } // FIXME: use inotify. sleep(30); } catch (std::exception & e) { @@ -321,7 +328,7 @@ int State::createSubstitutionStep(pqxx::work & txn, time_t startTime, time_t sto txn.exec_params0 ("insert into BuildStepOutputs (build, stepnr, name, path) values ($1, $2, $3, $4)", - build->id, stepNr, outputName, + build->id, stepNr, outputName, localStore->printStorePath(storePath)); return stepNr; @@ -770,6 +777,7 @@ void State::run(BuildID buildOne) dumpStatus(*conn); } + machinesReadyLock.lock(); std::thread(&State::monitorMachinesFile, this).detach(); std::thread(&State::queueMonitor, this).detach(); diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 1eed5a84..8f303d28 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -342,6 +342,7 @@ private: nix::Pool dbPool; /* The build machines. */ + std::mutex machinesReadyLock; typedef std::map Machines; nix::Sync machines; // FIXME: use atomic_shared_ptr From e709a175081181b0e57756677b216ebf71e2d482 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 10 Feb 2022 11:01:09 -0500 Subject: [PATCH 4/7] gitea.t: try opening the file a few times --- t/Hydra/Plugin/gitea.t | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/t/Hydra/Plugin/gitea.t b/t/Hydra/Plugin/gitea.t index 27fe1173..1f8da349 100644 --- a/t/Hydra/Plugin/gitea.t +++ b/t/Hydra/Plugin/gitea.t @@ -60,6 +60,13 @@ if (!defined($pid = fork())) { kill('INT', $pid); } +for my $i (1..10) { + if (! -f $filename) { + diag("$filename does not yet exist"); + sleep(1); + } +} + open(my $fh, "<", $filename) or die ("Can't open(): $!\n"); my $i = 0; my $uri = <$fh>; From 33f4c4c13d57866ecda3d822f21bc207d5f645b9 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 10 Feb 2022 11:13:31 -0500 Subject: [PATCH 5/7] build-locally-with-substitutable-path.t: give nix-store --delete a bit more time to run Under high load, like 64-128 tests at once, this can take more than a second. --- t/queue-runner/build-locally-with-substitutable-path.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/queue-runner/build-locally-with-substitutable-path.t b/t/queue-runner/build-locally-with-substitutable-path.t index 4f2c3057..e3b31761 100644 --- a/t/queue-runner/build-locally-with-substitutable-path.t +++ b/t/queue-runner/build-locally-with-substitutable-path.t @@ -39,7 +39,7 @@ subtest "Building, caching, and then garbage collecting the underlying job" => s ok(unlink(Hydra::Helper::Nix::gcRootFor($path)), "Unlinking the GC root for underlying Dependency succeeds"); - (my $ret, my $stdout, my $stderr) = captureStdoutStderr(1, "nix-store", "--delete", $path); + (my $ret, my $stdout, my $stderr) = captureStdoutStderr(5, "nix-store", "--delete", $path); is($ret, 0, "Deleting the underlying dependency should succeed"); }; From a1579007f44bf694a0c8e6a4f356fcb8e56c3c92 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 10 Feb 2022 11:53:10 -0500 Subject: [PATCH 6/7] Update t/test.pl: fixup the list of env vars yath looks at for job count --- t/test.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/test.pl b/t/test.pl index 7f17f107..ba7f3781 100644 --- a/t/test.pl +++ b/t/test.pl @@ -16,7 +16,7 @@ use List::SomeUtils qw(none); if (defined($ENV{"NIX_BUILD_CORES"}) and not defined($ENV{"YATH_JOB_COUNT"}) and not defined($ENV{"T2_HARNESS_JOB_COUNT"}) - and not defined($ENV{"T2_HARNESS_JOB_COUNT"})) { + and not defined($ENV{"HARNESS_JOB_COUNT"})) { $ENV{"YATH_JOB_COUNT"} = $ENV{"NIX_BUILD_CORES"}; print STDERR "test.pl: Defaulting \$YATH_JOB_COUNT to \$NIX_BUILD_CORES (${\$ENV{'NIX_BUILD_CORES'}})\n"; } From 4f9aea943478780d6a7d71c549d44d1e1413d560 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Thu, 10 Feb 2022 12:02:29 -0500 Subject: [PATCH 7/7] t/Hydra/Plugin/gitea.t: explain why we loop a few times Co-authored-by: Cole Helbling --- t/Hydra/Plugin/gitea.t | 3 +++ 1 file changed, 3 insertions(+) diff --git a/t/Hydra/Plugin/gitea.t b/t/Hydra/Plugin/gitea.t index 1f8da349..d84c8f57 100644 --- a/t/Hydra/Plugin/gitea.t +++ b/t/Hydra/Plugin/gitea.t @@ -60,6 +60,9 @@ if (!defined($pid = fork())) { kill('INT', $pid); } +# We expect $ctx{jobsdir}/server.py to create the file at $filename, but the time it +# takes to do so is non-deterministic. We need to give it _some_ time to hopefully +# settle -- but not too much that it drastically slows things down. for my $i (1..10) { if (! -f $filename) { diag("$filename does not yet exist");