From ed6b3165ea3de4e1dda93ce0f23ff61079e4fe0b Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 26 May 2024 17:45:36 +0200 Subject: [PATCH] worker: process timeouts first, and exclusively check goals for timeouts first, and their activity fds only if no timeout has occurred. checking for timeouts *after* activity sets us up for assertion failures by running multiple build completion notifiers, the first of which will kill/reap the the goal process and consuming the Pid instance. when the second notifier attempts to do the same it will core dump with an assertion failure in Pid and take down not only the single goal, but the entire daemon and all goals it was building. luckily this is rare in practice since it requires a build to both finish and time out at the same time. writing a test for this is not feasible due to how much it relies on scheduling to actually trigger the underlying bug, but on idle machines it can usually be triggered by running multiple sleeping builds with timeout set to the sleep duration and `--keep-going`: nix-build --timeout 10 --builders '' --keep-going -E ' with import {}; builtins.genList (i: runCommand "foo-${toString i}" {} "sleep 10") 100 ' Change-Id: I394d36b2e5ffb909cf8a19977d569bbdb71cb67b --- src/libstore/build/worker.cc | 42 +++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/libstore/build/worker.cc b/src/libstore/build/worker.cc index 83167b0f3..04be0da99 100644 --- a/src/libstore/build/worker.cc +++ b/src/libstore/build/worker.cc @@ -429,6 +429,28 @@ void Worker::waitForInput() GoalPtr goal = j->goal.lock(); assert(goal); + if (goal->exitCode == Goal::ecBusy && + 0 != settings.maxSilentTime && + j->respectTimeouts && + after - j->lastOutput >= std::chrono::seconds(settings.maxSilentTime)) + { + goal->timedOut(Error( + "%1% timed out after %2% seconds of silence", + goal->getName(), settings.maxSilentTime)); + continue; + } + + else if (goal->exitCode == Goal::ecBusy && + 0 != settings.buildTimeout && + j->respectTimeouts && + after - j->timeStarted >= std::chrono::seconds(settings.buildTimeout)) + { + goal->timedOut(Error( + "%1% timed out after %2% seconds", + goal->getName(), settings.buildTimeout)); + continue; + } + std::set fds2(j->fds); std::vector buffer(4096); for (auto & k : fds2) { @@ -455,26 +477,6 @@ void Worker::waitForInput() } } } - - if (goal->exitCode == Goal::ecBusy && - 0 != settings.maxSilentTime && - j->respectTimeouts && - after - j->lastOutput >= std::chrono::seconds(settings.maxSilentTime)) - { - goal->timedOut(Error( - "%1% timed out after %2% seconds of silence", - goal->getName(), settings.maxSilentTime)); - } - - else if (goal->exitCode == Goal::ecBusy && - 0 != settings.buildTimeout && - j->respectTimeouts && - after - j->timeStarted >= std::chrono::seconds(settings.buildTimeout)) - { - goal->timedOut(Error( - "%1% timed out after %2% seconds", - goal->getName(), settings.buildTimeout)); - } } if (!waitingForAWhile.empty() && lastWokenUp + std::chrono::seconds(settings.pollInterval) <= after) {