From e9670641ecedfa087bfb65e446c9ff57c260e482 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 7 Dec 2017 15:35:31 +0100 Subject: [PATCH] Distinguish build step states The web interface now shows whether a build step is connecting, copying inputs/outputs, building, etc. --- doc/dev-notes.txt | 2 +- src/hydra-queue-runner/build-remote.cc | 11 +++++++++- src/hydra-queue-runner/builder.cc | 12 +++++++++-- src/hydra-queue-runner/hydra-queue-runner.cc | 16 ++++++++++++-- src/hydra-queue-runner/state.hh | 15 ++++++++++++- src/lib/Hydra/Controller/Root.pm | 6 +++--- src/root/build.tt | 22 ++++++++++++++++---- src/root/machine-status.tt | 2 +- src/sql/hydra.sql | 9 +++++++- src/sql/upgrade-56.sql | 2 ++ 10 files changed, 81 insertions(+), 16 deletions(-) create mode 100644 src/sql/upgrade-56.sql diff --git a/doc/dev-notes.txt b/doc/dev-notes.txt index 98e9dc17..601aa852 100644 --- a/doc/dev-notes.txt +++ b/doc/dev-notes.txt @@ -95,7 +95,7 @@ * Delete all scheduled builds that are not already building: - delete from builds where finished = 0 and not exists (select 1 from buildschedulinginfo s where s.id = builds.id and busy = 1); + delete from builds where finished = 0 and not exists (select 1 from buildschedulinginfo s where s.id = builds.id and busy != 0); * select x.project, x.jobset, x.job, x.system, x.id, x.timestamp, r.buildstatus, b.id, b.timestamp diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc index 55958b6c..dccc4469 100644 --- a/src/hydra-queue-runner/build-remote.cc +++ b/src/hydra-queue-runner/build-remote.cc @@ -122,7 +122,8 @@ static void copyClosureTo(std::timed_mutex & sendMutex, ref destStore, void State::buildRemote(ref destStore, Machine::ptr machine, Step::ptr step, unsigned int maxSilentTime, unsigned int buildTimeout, unsigned int repeats, - RemoteResult & result, std::shared_ptr activeStep) + RemoteResult & result, std::shared_ptr activeStep, + std::function updateStep) { assert(BuildResult::TimedOut == 8); @@ -140,6 +141,8 @@ void State::buildRemote(ref destStore, try { + updateStep(ssConnecting); + Child child; openConnection(machine, tmpDir, logFD.get(), child); @@ -204,6 +207,8 @@ void State::buildRemote(ref destStore, outputs of the input derivations. On Nix > 1.9, we only need to copy the immediate sources of the derivation and the required outputs of the input derivations. */ + updateStep(ssSendingInputs); + PathSet inputs; BasicDerivation basicDrv(step->drv); @@ -260,6 +265,8 @@ void State::buildRemote(ref destStore, /* Do the build. */ printMsg(lvlDebug, format("building ‘%1%’ on ‘%2%’") % step->drvPath % machine->sshName); + updateStep(ssBuilding); + if (sendDerivation) to << cmdBuildPaths << PathSet({step->drvPath}); else @@ -371,6 +378,8 @@ void State::buildRemote(ref destStore, /* Copy the output paths. */ if (/* machine->sshName != "localhost" */ true) { + updateStep(ssReceivingOutputs); + MaintainCount mc(nrStepsCopyingFrom); auto now1 = std::chrono::steady_clock::now(); diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index ceaa252e..d9a7cbbb 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -195,10 +195,16 @@ State::StepResult State::doBuildStep(nix::ref destStore, txn.commit(); } + auto updateStep = [&](StepState stepState) { + pqxx::work txn(*conn); + updateBuildStep(txn, buildId, stepNr, stepState); + txn.commit(); + }; + /* Do the build. */ try { /* FIXME: referring builds may have conflicting timeouts. */ - buildRemote(destStore, machine, step, maxSilentTime, buildTimeout, repeats, result, activeStep); + buildRemote(destStore, machine, step, maxSilentTime, buildTimeout, repeats, result, activeStep, updateStep); } catch (NoTokens & e) { result.stepStatus = bsNarSizeLimitExceeded; } catch (Error & e) { @@ -213,8 +219,10 @@ State::StepResult State::doBuildStep(nix::ref destStore, } } - if (result.stepStatus == bsSuccess) + if (result.stepStatus == bsSuccess) { + updateStep(ssPostProcessing); res = getBuildOutput(destStore, ref(result.accessor), step->drv); + } result.accessor = 0; result.tokens = 0; diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 7339d603..85635823 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -271,7 +271,7 @@ void State::clearBusy(Connection & conn, time_t stopTime) { pqxx::work txn(conn); txn.parameterized - ("update BuildSteps set busy = 0, status = $1, stopTime = $2 where busy = 1") + ("update BuildSteps set busy = 0, status = $1, stopTime = $2 where busy != 0") ((int) bsAborted) (stopTime, stopTime != 0).exec(); txn.commit(); @@ -317,6 +317,18 @@ unsigned int State::createBuildStep(pqxx::work & txn, time_t startTime, BuildID } +void State::updateBuildStep(pqxx::work & txn, BuildID buildId, unsigned int stepNr, StepState stepState) +{ + if (txn.parameterized + ("update BuildSteps set busy = $1 where build = $2 and stepnr = $3 and busy != 0 and status is null") + ((int) stepState) + (buildId) + (stepNr) + .exec().affected_rows() != 1) + throw Error("step %d of build %d is in an unexpected state", stepNr, buildId); +} + + void State::finishBuildStep(pqxx::work & txn, const RemoteResult & result, BuildID buildId, unsigned int stepNr, const std::string & machine) { @@ -892,7 +904,7 @@ void State::run(BuildID buildOne) for (auto & step : steps) { printMsg(lvlError, format("cleaning orphaned step %d of build %d") % step.second % step.first); txn.parameterized - ("update BuildSteps set busy = 0, status = $1 where build = $2 and stepnr = $3 and busy = 1") + ("update BuildSteps set busy = 0, status = $1 where build = $2 and stepnr = $3 and busy != 0") ((int) bsAborted) (step.first) (step.second).exec(); diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 543a36e3..fc17a116 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -41,6 +41,16 @@ typedef enum { } BuildStatus; +typedef enum { + ssPreparing = 1, + ssConnecting = 10, + ssSendingInputs = 20, + ssBuilding = 30, + ssReceivingOutputs = 40, + ssPostProcessing = 50, +} StepState; + + struct RemoteResult { BuildStatus stepStatus = bsAborted; @@ -464,6 +474,8 @@ private: const std::string & machine, BuildStatus status, const std::string & errorMsg = "", BuildID propagatedFrom = 0); + void updateBuildStep(pqxx::work & txn, BuildID buildId, unsigned int stepNr, StepState stepState); + void finishBuildStep(pqxx::work & txn, const RemoteResult & result, BuildID buildId, unsigned int stepNr, const std::string & machine); @@ -518,7 +530,8 @@ private: Machine::ptr machine, Step::ptr step, unsigned int maxSilentTime, unsigned int buildTimeout, unsigned int repeats, - RemoteResult & result, std::shared_ptr activeStep); + RemoteResult & result, std::shared_ptr activeStep, + std::function updateStep); void markSucceededBuild(pqxx::work & txn, Build::ptr build, const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime); diff --git a/src/lib/Hydra/Controller/Root.pm b/src/lib/Hydra/Controller/Root.pm index e6a503a1..8e53dad7 100644 --- a/src/lib/Hydra/Controller/Root.pm +++ b/src/lib/Hydra/Controller/Root.pm @@ -53,7 +53,7 @@ sub begin :Private { if (scalar(@args) == 0 || $args[0] ne "static") { $c->stash->{nrRunningBuilds} = dbh($c)->selectrow_array( - "select count(distinct build) from buildsteps where busy = 1"); + "select count(distinct build) from buildsteps where busy != 0"); $c->stash->{nrQueuedBuilds} = $c->model('DB::Builds')->search({ finished => 0 })->count(); } @@ -145,7 +145,7 @@ sub status_GET { $self->status_ok( $c, entity => [$c->model('DB::Builds')->search( - { "buildsteps.busy" => 1 }, + { "buildsteps.busy" => { '!=', 0 } }, { order_by => ["globalpriority DESC", "id"], join => "buildsteps", columns => [@buildListColumns] @@ -193,7 +193,7 @@ sub machines :Local Args(0) { $c->stash->{steps} = dbh($c)->selectall_arrayref( "select build, stepnr, s.system as system, s.drvpath as drvpath, machine, s.starttime as starttime, project, jobset, job " . "from BuildSteps s join Builds b on s.build = b.id " . - "where busy = 1 order by machine, stepnr", + "where busy != 0 order by machine, stepnr", { Slice => {} }); $c->stash->{template} = 'machine-status.tt'; } diff --git a/src/root/build.tt b/src/root/build.tt index 8b7171a9..ee0dcaa2 100644 --- a/src/root/build.tt +++ b/src/root/build.tt @@ -29,7 +29,7 @@ END; [% FOREACH step IN steps %] - [% IF ( type == "All" ) || ( type == "Failed" && step.busy == 0 && step.status != 0 ) || ( type == "Running" && step.busy == 1 ) %] + [% IF ( type == "All" ) || ( type == "Failed" && step.busy == 0 && step.status != 0 ) || ( type == "Running" && step.busy != 0 ) %] [% has_log = seen.${step.drvpath} ? 0 : buildStepLogExists(step); seen.${step.drvpath} = 1; log = c.uri_for('/build' build.id 'nixlog' step.stepnr); %] @@ -55,10 +55,24 @@ END; INCLUDE renderDuration duration = curTime - step.starttime; END %] - [% IF step.busy == 1 || ((step.machine || step.starttime) && (step.status == 0 || step.status == 1 || step.status == 3 || step.status == 4 || step.status == 7)); INCLUDE renderMachineName machine=step.machine; ELSE; "n/a"; END %] + [% IF step.busy != 0 || ((step.machine || step.starttime) && (step.status == 0 || step.status == 1 || step.status == 3 || step.status == 4 || step.status == 7)); INCLUDE renderMachineName machine=step.machine; ELSE; "n/a"; END %] - [% IF step.busy == 1 %] - Building + [% IF step.busy != 0 %] + [% IF step.busy == 1 %] + Preparing + [% ELSIF step.busy == 10 %] + Connecting + [% ELSIF step.busy == 20 %] + Sending inputs + [% ELSIF step.busy == 30 %] + Building + [% ELSIF step.busy == 40 %] + Receiving outputs + [% ELSIF step.busy == 50 %] + Post-processing + [% ELSE %] + Unknown state + [% END %] [% ELSIF step.status == 0 %] [% IF step.isnondeterministic %] Succeeded with non-determistic result diff --git a/src/root/machine-status.tt b/src/root/machine-status.tt index 7c6d1f2b..4195c178 100644 --- a/src/root/machine-status.tt +++ b/src/root/machine-status.tt @@ -42,7 +42,7 @@ [% INCLUDE renderFullJobName project=step.project jobset=step.jobset job=step.job %] [% step.system %] [% step.build %] - [% step.stepnr %] + [% IF step.busy >= 30 %][% step.stepnr %][% ELSE; step.stepnr; END %] [% step.drvpath.match('-(.*)').0 %] [% INCLUDE renderDuration duration = curTime - step.starttime %] diff --git a/src/sql/hydra.sql b/src/sql/hydra.sql index d1c3ee37..4c8710b8 100644 --- a/src/sql/hydra.sql +++ b/src/sql/hydra.sql @@ -272,6 +272,13 @@ create table BuildSteps ( drvPath text, + -- 0 = not busy + -- 1 = building + -- 2 = preparing to build + -- 3 = connecting + -- 4 = sending inputs + -- 5 = receiving outputs + -- 6 = analysing build result busy integer not null, status integer, -- see Builds.buildStatus @@ -648,7 +655,7 @@ create index IndexBuildInputsOnDependency on BuildInputs(dependency); create index IndexBuildMetricsOnJobTimestamp on BuildMetrics(project, jobset, job, timestamp desc); create index IndexBuildProducstOnBuildAndType on BuildProducts(build, type); create index IndexBuildProductsOnBuild on BuildProducts(build); -create index IndexBuildStepsOnBusy on BuildSteps(busy) where busy = 1; +create index IndexBuildStepsOnBusy on BuildSteps(busy) where busy != 0; create index IndexBuildStepsOnDrvPath on BuildSteps(drvpath); create index IndexBuildStepsOnPropagatedFrom on BuildSteps(propagatedFrom) where propagatedFrom is not null; create index IndexBuildStepsOnStopTime on BuildSteps(stopTime desc) where startTime is not null and stopTime is not null; diff --git a/src/sql/upgrade-56.sql b/src/sql/upgrade-56.sql new file mode 100644 index 00000000..6b9449aa --- /dev/null +++ b/src/sql/upgrade-56.sql @@ -0,0 +1,2 @@ +drop index IndexBuildStepsOnBusy; +create index IndexBuildStepsOnBusy on BuildSteps(busy) where busy != 0;