From 145667cb53aebac706eef7b50e42d366b72f53ac Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Sat, 19 Mar 2022 22:43:19 -0400 Subject: [PATCH 1/4] hydra-update-gc-roots: allow cached refs to the build's jobset Re-executing this search_related on every access turned out to create very problematic performance. If a jobset had a lot of error output stored in the jobset, and there were many hundreds or thousands of active jobs, this could easily cause >1Gbps of network traffic. --- src/script/hydra-update-gc-roots | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/script/hydra-update-gc-roots b/src/script/hydra-update-gc-roots index fbb90488..f446cdf9 100755 --- a/src/script/hydra-update-gc-roots +++ b/src/script/hydra-update-gc-roots @@ -39,10 +39,7 @@ sub keepBuild { $build->finished; - # After #1093 merges this can become $build->jobset; - # However, with ->jobset being a column on master - # it seems DBIX gets a bit confused. - my ($jobset) = $build->search_related('jobset')->first; + my ($jobset) = $build->jobset; print STDERR " keeping ", ($build->finished ? "" : "scheduled "), "build ", $build->id, " (", $jobset->get_column('project'), ":", $jobset->get_column('name'), ":", $build->get_column('job'), "; ", From f353a7ac41933b376c282b6928b74d3c5e591add Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Sat, 19 Mar 2022 23:12:28 -0400 Subject: [PATCH 2/4] update-gc-roots: try subselecting the jobset table --- src/script/hydra-update-gc-roots | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/script/hydra-update-gc-roots b/src/script/hydra-update-gc-roots index f446cdf9..8cf24d5d 100755 --- a/src/script/hydra-update-gc-roots +++ b/src/script/hydra-update-gc-roots @@ -37,8 +37,6 @@ sub keepBuild { return if defined $seenBuilds{$build->id}; $seenBuilds{$build->id} = 1; - $build->finished; - my ($jobset) = $build->jobset; print STDERR " keeping ", ($build->finished ? "" : "scheduled "), "build ", $build->id, " (", @@ -76,13 +74,29 @@ closedir $dir; # For scheduled builds, we register the derivation as a GC root. print STDERR "*** looking for scheduled builds\n"; -keepBuild($_, 0) foreach $db->resultset('Builds')->search({ finished => 0 }, { columns => [ @columns ] }); +keepBuild($_, 0) foreach $db->resultset('Builds')->search( + { finished => 0 }, + { + columns => [ @columns ], + join => 'jobset', + '+select' => ['jobset.project', 'jobset.name'], + '+as' => ['jobset.project', 'jobset.name'], + } +); # Keep all builds that have been marked as "keep". print STDERR "*** looking for kept builds\n"; my @buildsToKeep = $db->resultset('Builds')->search( - { finished => 1, keep => 1 }, { order_by => ["jobset_id", "job", "id"], columns => [ @columns ] }); + { finished => 1, keep => 1 }, + { + order_by => ["jobset_id", "job", "id"], + columns => [ @columns ], + join => 'jobset', + '+select' => ['jobset.project', 'jobset.name'], + '+as' => ['jobset.project', 'jobset.name'], + } +); keepBuild($_, 0) foreach @buildsToKeep; From 137be3452e9a8140b373ae195ba396fa115576c4 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Sat, 19 Mar 2022 23:24:28 -0400 Subject: [PATCH 3/4] Reduce the jobset cols on the remaining two queries --- src/script/hydra-update-gc-roots | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/script/hydra-update-gc-roots b/src/script/hydra-update-gc-roots index 8cf24d5d..315d4ebb 100755 --- a/src/script/hydra-update-gc-roots +++ b/src/script/hydra-update-gc-roots @@ -138,7 +138,14 @@ foreach my $project ($db->resultset('Projects')->search({}, { order_by => ["name { id => { -in => $db->resultset('JobsetEvalMembers')->search({ eval => { -in => [@evals] } }, { select => "build" })->as_query } , finished => 1 }, - { order_by => ["job", "id"], columns => [ @columns ] }); + { + order_by => ["job", "id"], + columns => [ @columns ], + join => 'jobset', + '+select' => ['jobset.project', 'jobset.name'], + '+as' => ['jobset.project', 'jobset.name'], + } + ); print STDERR "*** looking for the most recent successful builds of current jobs in ", $project->name, ":", $jobset->name, "\n"; @@ -158,7 +165,13 @@ foreach my $project ($db->resultset('Projects')->search({}, { order_by => ["name , select => [ { max => 'id', -as => 'm' } ] })->as_query } }, - { columns => [ @columns ] }); + { + columns => [ @columns ] + join => 'jobset', + '+select' => ['jobset.project', 'jobset.name'], + '+as' => ['jobset.project', 'jobset.name'], + } + ); } } From e5393c2cf87909b251d0ddcdc1ea254956d79660 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Sat, 19 Mar 2022 23:34:13 -0400 Subject: [PATCH 4/4] fixup: make id non-ambiguous --- src/script/hydra-update-gc-roots | 10 +++++----- t/scripts/hydra-update-gc-roots/update-gc-roots.t | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/script/hydra-update-gc-roots b/src/script/hydra-update-gc-roots index 315d4ebb..11eba7a6 100755 --- a/src/script/hydra-update-gc-roots +++ b/src/script/hydra-update-gc-roots @@ -135,11 +135,11 @@ foreach my $project ($db->resultset('Projects')->search({}, { order_by => ["name # Note: we also keep the derivations of failed builds so that # they can be restarted. keepBuild($_, 1) foreach $jobset->builds->search( - { id => { -in => $db->resultset('JobsetEvalMembers')->search({ eval => { -in => [@evals] } }, { select => "build" })->as_query } + { "me.id" => { -in => $db->resultset('JobsetEvalMembers')->search({ eval => { -in => [@evals] } }, { select => "build" })->as_query } , finished => 1 }, { - order_by => ["job", "id"], + order_by => ["job", "me.id"], columns => [ @columns ], join => 'jobset', '+select' => ['jobset.project', 'jobset.name'], @@ -153,7 +153,7 @@ foreach my $project ($db->resultset('Projects')->search({}, { order_by => ["name # Keep the most recently succeeded build of a current job. Oh # I really need to stop using DBIx::Class. keepBuild($_, 1) foreach $jobset->builds->search( - { id => { -in => $jobset->builds->search( + { "me.id" => { -in => $jobset->builds->search( { finished => 1 , buildstatus => [0, 6] , job => { -in => $jobset->builds->search( @@ -162,11 +162,11 @@ foreach my $project ($db->resultset('Projects')->search({}, { order_by => ["name )->as_query } }, { group_by => 'job' - , select => [ { max => 'id', -as => 'm' } ] + , select => [ { max => 'me.id', -as => 'm' } ] })->as_query } }, { - columns => [ @columns ] + columns => [ @columns ], join => 'jobset', '+select' => ['jobset.project', 'jobset.name'], '+as' => ['jobset.project', 'jobset.name'], diff --git a/t/scripts/hydra-update-gc-roots/update-gc-roots.t b/t/scripts/hydra-update-gc-roots/update-gc-roots.t index d47e36c1..3e019ece 100644 --- a/t/scripts/hydra-update-gc-roots/update-gc-roots.t +++ b/t/scripts/hydra-update-gc-roots/update-gc-roots.t @@ -16,7 +16,7 @@ subtest "Updating GC roots" => sub { is($res, 0, "hydra-update-gc-roots should exit zero"); if ($res != 0) { print "gc roots stdout: $stdout\n"; - print "gc roots stderr: $stderr"; + print "gc roots stderr: $stderr\n"; } };