2015-07-07 08:17:21 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <atomic>
|
|
|
|
|
#include <chrono>
|
|
|
|
|
#include <condition_variable>
|
|
|
|
|
#include <map>
|
|
|
|
|
#include <memory>
|
|
|
|
|
#include <queue>
|
|
|
|
|
|
|
|
|
|
#include "db.hh"
|
|
|
|
|
#include "counter.hh"
|
|
|
|
|
#include "pathlocks.hh"
|
|
|
|
|
#include "pool.hh"
|
|
|
|
|
#include "sync.hh"
|
|
|
|
|
|
|
|
|
|
#include "store-api.hh"
|
|
|
|
|
#include "derivations.hh"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef unsigned int BuildID;
|
|
|
|
|
|
|
|
|
|
typedef std::chrono::time_point<std::chrono::system_clock> system_time;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
|
bsSuccess = 0,
|
|
|
|
|
bsFailed = 1,
|
|
|
|
|
bsDepFailed = 2,
|
|
|
|
|
bsAborted = 3,
|
|
|
|
|
bsFailedWithOutput = 6,
|
|
|
|
|
bsTimedOut = 7,
|
|
|
|
|
bsUnsupported = 9,
|
|
|
|
|
} BuildStatus;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
|
bssSuccess = 0,
|
|
|
|
|
bssFailed = 1,
|
|
|
|
|
bssAborted = 4,
|
|
|
|
|
bssTimedOut = 7,
|
2015-07-30 22:57:30 +00:00
|
|
|
|
bssCachedFailure = 8,
|
2015-07-07 08:17:21 +00:00
|
|
|
|
bssUnsupported = 9,
|
|
|
|
|
bssBusy = 100, // not stored
|
|
|
|
|
} BuildStepStatus;
|
|
|
|
|
|
|
|
|
|
|
2015-07-20 23:45:00 +00:00
|
|
|
|
struct RemoteResult : nix::BuildResult
|
2015-07-07 08:25:33 +00:00
|
|
|
|
{
|
|
|
|
|
time_t startTime = 0, stopTime = 0;
|
|
|
|
|
nix::Path logFile;
|
2015-07-20 23:45:00 +00:00
|
|
|
|
|
|
|
|
|
bool canRetry()
|
|
|
|
|
{
|
|
|
|
|
return status == TransientFailure || status == MiscFailure;
|
|
|
|
|
}
|
2015-07-07 08:25:33 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2015-07-07 08:17:21 +00:00
|
|
|
|
struct Step;
|
2015-07-20 23:45:00 +00:00
|
|
|
|
struct BuildOutput;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
|
|
|
|
|
|
2015-08-10 23:30:24 +00:00
|
|
|
|
class Jobset
|
|
|
|
|
{
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
typedef std::shared_ptr<Jobset> ptr;
|
|
|
|
|
typedef std::weak_ptr<Jobset> wptr;
|
|
|
|
|
|
|
|
|
|
static const time_t schedulingWindow = 24 * 60 * 60;
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
|
|
std::atomic<time_t> seconds{0};
|
2015-08-12 11:17:56 +00:00
|
|
|
|
std::atomic<unsigned int> shares{1};
|
2015-08-10 23:30:24 +00:00
|
|
|
|
|
|
|
|
|
/* The start time and duration of the most recent build steps. */
|
|
|
|
|
Sync<std::map<time_t, time_t>> steps;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
double shareUsed()
|
|
|
|
|
{
|
|
|
|
|
return (double) seconds / shares;
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-12 11:17:56 +00:00
|
|
|
|
void setShares(int shares_)
|
|
|
|
|
{
|
|
|
|
|
assert(shares_ > 0);
|
|
|
|
|
shares = shares_;
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-10 23:30:24 +00:00
|
|
|
|
time_t getSeconds() { return seconds; }
|
|
|
|
|
|
|
|
|
|
void addStep(time_t startTime, time_t duration);
|
|
|
|
|
|
|
|
|
|
void pruneSteps();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2015-07-07 08:17:21 +00:00
|
|
|
|
struct Build
|
|
|
|
|
{
|
|
|
|
|
typedef std::shared_ptr<Build> ptr;
|
|
|
|
|
typedef std::weak_ptr<Build> wptr;
|
|
|
|
|
|
|
|
|
|
BuildID id;
|
2015-07-07 08:29:43 +00:00
|
|
|
|
nix::Path drvPath;
|
|
|
|
|
std::map<std::string, nix::Path> outputs;
|
2015-07-30 22:57:30 +00:00
|
|
|
|
std::string projectName, jobsetName, jobName;
|
|
|
|
|
time_t timestamp;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
unsigned int maxSilentTime, buildTimeout;
|
2015-08-12 10:05:43 +00:00
|
|
|
|
int localPriority, globalPriority;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
|
|
|
|
|
std::shared_ptr<Step> toplevel;
|
|
|
|
|
|
2015-08-10 23:30:24 +00:00
|
|
|
|
Jobset::ptr jobset;
|
|
|
|
|
|
2015-07-07 08:17:21 +00:00
|
|
|
|
std::atomic_bool finishedInDB{false};
|
2015-07-30 22:57:30 +00:00
|
|
|
|
|
|
|
|
|
std::string fullJobName()
|
|
|
|
|
{
|
|
|
|
|
return projectName + ":" + jobsetName + ":" + jobName;
|
|
|
|
|
}
|
2015-08-10 14:18:06 +00:00
|
|
|
|
|
|
|
|
|
void propagatePriorities();
|
2015-07-07 08:17:21 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct Step
|
|
|
|
|
{
|
|
|
|
|
typedef std::shared_ptr<Step> ptr;
|
|
|
|
|
typedef std::weak_ptr<Step> wptr;
|
|
|
|
|
|
2015-07-07 08:29:43 +00:00
|
|
|
|
nix::Path drvPath;
|
|
|
|
|
nix::Derivation drv;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
std::set<std::string> requiredSystemFeatures;
|
|
|
|
|
bool preferLocalBuild;
|
|
|
|
|
|
|
|
|
|
struct State
|
|
|
|
|
{
|
|
|
|
|
/* Whether the step has finished initialisation. */
|
|
|
|
|
bool created = false;
|
|
|
|
|
|
|
|
|
|
/* The build steps on which this step depends. */
|
|
|
|
|
std::set<Step::ptr> deps;
|
|
|
|
|
|
|
|
|
|
/* The build steps that depend on this step. */
|
|
|
|
|
std::vector<Step::wptr> rdeps;
|
|
|
|
|
|
|
|
|
|
/* Builds that have this step as the top-level derivation. */
|
|
|
|
|
std::vector<Build::wptr> builds;
|
|
|
|
|
|
2015-08-10 23:30:24 +00:00
|
|
|
|
/* Jobsets to which this step belongs. Used for determining
|
|
|
|
|
scheduling priority. */
|
|
|
|
|
std::set<Jobset::ptr> jobsets;
|
|
|
|
|
|
2015-07-07 08:17:21 +00:00
|
|
|
|
/* Number of times we've tried this step. */
|
|
|
|
|
unsigned int tries = 0;
|
|
|
|
|
|
|
|
|
|
/* Point in time after which the step can be retried. */
|
|
|
|
|
system_time after;
|
2015-08-10 12:50:22 +00:00
|
|
|
|
|
2015-08-10 14:18:06 +00:00
|
|
|
|
/* The highest global priority of any build depending on this
|
|
|
|
|
step. */
|
|
|
|
|
int highestGlobalPriority{0};
|
|
|
|
|
|
2015-08-10 23:30:24 +00:00
|
|
|
|
/* The lowest share used of any jobset depending on this
|
|
|
|
|
step. */
|
|
|
|
|
double lowestShareUsed;
|
|
|
|
|
|
2015-08-12 10:05:43 +00:00
|
|
|
|
/* The highest local priority of any build depending on this
|
|
|
|
|
step. */
|
|
|
|
|
int highestLocalPriority{0};
|
|
|
|
|
|
2015-08-10 14:18:06 +00:00
|
|
|
|
/* The lowest ID of any build depending on this step. */
|
2015-08-10 12:50:22 +00:00
|
|
|
|
BuildID lowestBuildID{std::numeric_limits<BuildID>::max()};
|
2015-07-07 08:17:21 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
std::atomic_bool finished{false}; // debugging
|
|
|
|
|
|
|
|
|
|
Sync<State> state;
|
|
|
|
|
|
|
|
|
|
~Step()
|
|
|
|
|
{
|
|
|
|
|
//printMsg(lvlError, format("destroying step %1%") % drvPath);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2015-07-21 13:14:17 +00:00
|
|
|
|
void getDependents(Step::ptr step, std::set<Build::ptr> & builds, std::set<Step::ptr> & steps);
|
|
|
|
|
|
2015-08-10 12:50:22 +00:00
|
|
|
|
/* Call ‘visitor’ for a step and all its dependencies. */
|
|
|
|
|
void visitDependencies(std::function<void(Step::ptr)> visitor, Step::ptr step);
|
|
|
|
|
|
2015-07-21 13:14:17 +00:00
|
|
|
|
|
2015-07-07 08:17:21 +00:00
|
|
|
|
struct Machine
|
|
|
|
|
{
|
|
|
|
|
typedef std::shared_ptr<Machine> ptr;
|
|
|
|
|
|
|
|
|
|
std::string sshName, sshKey;
|
|
|
|
|
std::set<std::string> systemTypes, supportedFeatures, mandatoryFeatures;
|
|
|
|
|
unsigned int maxJobs = 1;
|
|
|
|
|
float speedFactor = 1.0;
|
|
|
|
|
|
|
|
|
|
struct State {
|
|
|
|
|
typedef std::shared_ptr<State> ptr;
|
|
|
|
|
counter currentJobs{0};
|
|
|
|
|
counter nrStepsDone{0};
|
|
|
|
|
counter totalStepTime{0}; // total time for steps, including closure copying
|
|
|
|
|
counter totalStepBuildTime{0}; // total build time for steps
|
2015-07-07 12:04:36 +00:00
|
|
|
|
|
2015-07-21 13:53:27 +00:00
|
|
|
|
struct ConnectInfo
|
|
|
|
|
{
|
|
|
|
|
system_time lastFailure, disabledUntil;
|
|
|
|
|
unsigned int consecutiveFailures;
|
|
|
|
|
};
|
|
|
|
|
Sync<ConnectInfo> connectInfo;
|
|
|
|
|
|
2015-07-07 12:04:36 +00:00
|
|
|
|
/* Mutex to prevent multiple threads from sending data to the
|
|
|
|
|
same machine (which would be inefficient). */
|
|
|
|
|
std::mutex sendLock;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
State::ptr state;
|
|
|
|
|
|
|
|
|
|
bool supportsStep(Step::ptr step)
|
|
|
|
|
{
|
|
|
|
|
if (systemTypes.find(step->drv.platform) == systemTypes.end()) return false;
|
|
|
|
|
for (auto & f : mandatoryFeatures)
|
|
|
|
|
if (step->requiredSystemFeatures.find(f) == step->requiredSystemFeatures.end()
|
|
|
|
|
&& !(step->preferLocalBuild && f == "local"))
|
|
|
|
|
return false;
|
|
|
|
|
for (auto & f : step->requiredSystemFeatures)
|
|
|
|
|
if (supportedFeatures.find(f) == supportedFeatures.end()) return false;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class State
|
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
|
2015-07-21 13:14:17 +00:00
|
|
|
|
// FIXME: Make configurable.
|
|
|
|
|
const unsigned int maxTries = 5;
|
|
|
|
|
const unsigned int retryInterval = 60; // seconds
|
|
|
|
|
const float retryBackoff = 3.0;
|
|
|
|
|
const unsigned int maxParallelCopyClosure = 4;
|
|
|
|
|
|
2015-07-07 08:29:43 +00:00
|
|
|
|
nix::Path hydraData, logDir;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
|
2015-07-07 08:29:43 +00:00
|
|
|
|
nix::StringSet localPlatforms;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
|
|
|
|
|
/* The queued builds. */
|
|
|
|
|
typedef std::map<BuildID, Build::ptr> Builds;
|
|
|
|
|
Sync<Builds> builds;
|
|
|
|
|
|
2015-08-10 23:30:24 +00:00
|
|
|
|
/* The jobsets. */
|
|
|
|
|
typedef std::map<std::pair<std::string, std::string>, Jobset::ptr> Jobsets;
|
|
|
|
|
Sync<Jobsets> jobsets;
|
|
|
|
|
|
2015-07-07 08:17:21 +00:00
|
|
|
|
/* All active or pending build steps (i.e. dependencies of the
|
|
|
|
|
queued builds). Note that these are weak pointers. Steps are
|
|
|
|
|
kept alive by being reachable from Builds or by being in
|
|
|
|
|
progress. */
|
2015-07-07 08:29:43 +00:00
|
|
|
|
typedef std::map<nix::Path, Step::wptr> Steps;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
Sync<Steps> steps;
|
|
|
|
|
|
|
|
|
|
/* Build steps that have no unbuilt dependencies. */
|
|
|
|
|
typedef std::list<Step::wptr> Runnable;
|
|
|
|
|
Sync<Runnable> runnable;
|
|
|
|
|
|
|
|
|
|
/* CV for waking up the dispatcher. */
|
2015-08-10 09:58:33 +00:00
|
|
|
|
Sync<bool> dispatcherWakeup;
|
|
|
|
|
std::condition_variable_any dispatcherWakeupCV;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
|
|
|
|
|
/* PostgreSQL connection pool. */
|
|
|
|
|
Pool<Connection> dbPool;
|
|
|
|
|
|
|
|
|
|
/* The build machines. */
|
2015-07-07 08:29:43 +00:00
|
|
|
|
typedef std::map<std::string, Machine::ptr> Machines;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
Sync<Machines> machines; // FIXME: use atomic_shared_ptr
|
|
|
|
|
|
2015-07-07 08:29:43 +00:00
|
|
|
|
nix::Path machinesFile;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
struct stat machinesFileStat;
|
|
|
|
|
|
|
|
|
|
/* Various stats. */
|
|
|
|
|
time_t startedAt;
|
|
|
|
|
counter nrBuildsRead{0};
|
|
|
|
|
counter nrBuildsDone{0};
|
|
|
|
|
counter nrStepsDone{0};
|
|
|
|
|
counter nrActiveSteps{0};
|
|
|
|
|
counter nrStepsBuilding{0};
|
|
|
|
|
counter nrStepsCopyingTo{0};
|
|
|
|
|
counter nrStepsCopyingFrom{0};
|
2015-07-10 17:10:14 +00:00
|
|
|
|
counter nrStepsWaiting{0};
|
2015-07-07 08:17:21 +00:00
|
|
|
|
counter nrRetries{0};
|
|
|
|
|
counter maxNrRetries{0};
|
|
|
|
|
counter totalStepTime{0}; // total time for steps, including closure copying
|
|
|
|
|
counter totalStepBuildTime{0}; // total build time for steps
|
|
|
|
|
counter nrQueueWakeups{0};
|
|
|
|
|
counter nrDispatcherWakeups{0};
|
|
|
|
|
counter bytesSent{0};
|
|
|
|
|
counter bytesReceived{0};
|
|
|
|
|
|
|
|
|
|
/* Log compressor work queue. */
|
2015-07-07 08:29:43 +00:00
|
|
|
|
Sync<std::queue<nix::Path>> logCompressorQueue;
|
2015-07-07 08:17:21 +00:00
|
|
|
|
std::condition_variable_any logCompressorWakeup;
|
|
|
|
|
|
|
|
|
|
/* Notification sender work queue. FIXME: if hydra-queue-runner is
|
|
|
|
|
killed before it has finished sending notifications about a
|
|
|
|
|
build, then the notifications may be lost. It would be better
|
|
|
|
|
to mark builds with pending notification in the database. */
|
|
|
|
|
typedef std::pair<BuildID, std::vector<BuildID>> NotificationItem;
|
|
|
|
|
Sync<std::queue<NotificationItem>> notificationSenderQueue;
|
|
|
|
|
std::condition_variable_any notificationSenderWakeup;
|
|
|
|
|
|
|
|
|
|
/* Specific build to do for --build-one (testing only). */
|
|
|
|
|
BuildID buildOne;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
State();
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
|
|
void clearBusy(Connection & conn, time_t stopTime);
|
|
|
|
|
|
|
|
|
|
/* (Re)load /etc/nix/machines. */
|
|
|
|
|
void loadMachinesFile();
|
|
|
|
|
|
|
|
|
|
/* Thread to reload /etc/nix/machines periodically. */
|
|
|
|
|
void monitorMachinesFile();
|
|
|
|
|
|
|
|
|
|
int createBuildStep(pqxx::work & txn, time_t startTime, Build::ptr build, Step::ptr step,
|
|
|
|
|
const std::string & machine, BuildStepStatus status, const std::string & errorMsg = "",
|
|
|
|
|
BuildID propagatedFrom = 0);
|
|
|
|
|
|
|
|
|
|
void finishBuildStep(pqxx::work & txn, time_t startTime, time_t stopTime, BuildID buildId, int stepNr,
|
2015-07-07 08:29:43 +00:00
|
|
|
|
const std::string & machine, BuildStepStatus status, const std::string & errorMsg = "",
|
2015-07-07 08:17:21 +00:00
|
|
|
|
BuildID propagatedFrom = 0);
|
|
|
|
|
|
|
|
|
|
void updateBuild(pqxx::work & txn, Build::ptr build, BuildStatus status);
|
|
|
|
|
|
|
|
|
|
void queueMonitor();
|
|
|
|
|
|
|
|
|
|
void queueMonitorLoop();
|
|
|
|
|
|
2015-08-10 14:18:06 +00:00
|
|
|
|
/* Check the queue for new builds. */
|
2015-07-07 08:29:43 +00:00
|
|
|
|
void getQueuedBuilds(Connection & conn, std::shared_ptr<nix::StoreAPI> store, unsigned int & lastBuildId);
|
2015-07-07 08:17:21 +00:00
|
|
|
|
|
2015-08-10 14:18:06 +00:00
|
|
|
|
/* Handle cancellation, deletion and priority bumps. */
|
|
|
|
|
void processQueueChange(Connection & conn);
|
2015-07-07 08:17:21 +00:00
|
|
|
|
|
2015-07-07 08:29:43 +00:00
|
|
|
|
Step::ptr createStep(std::shared_ptr<nix::StoreAPI> store, const nix::Path & drvPath,
|
|
|
|
|
Build::ptr referringBuild, Step::ptr referringStep, std::set<nix::Path> & finishedDrvs,
|
2015-07-07 08:17:21 +00:00
|
|
|
|
std::set<Step::ptr> & newSteps, std::set<Step::ptr> & newRunnable);
|
|
|
|
|
|
2015-08-10 23:30:24 +00:00
|
|
|
|
Jobset::ptr createJobset(pqxx::work & txn,
|
|
|
|
|
const std::string & projectName, const std::string & jobsetName);
|
|
|
|
|
|
2015-08-12 11:17:56 +00:00
|
|
|
|
void processJobsetSharesChange(Connection & conn);
|
|
|
|
|
|
2015-07-07 08:17:21 +00:00
|
|
|
|
void makeRunnable(Step::ptr step);
|
|
|
|
|
|
|
|
|
|
/* The thread that selects and starts runnable builds. */
|
|
|
|
|
void dispatcher();
|
|
|
|
|
|
2015-08-10 09:26:30 +00:00
|
|
|
|
system_time doDispatch();
|
|
|
|
|
|
2015-07-07 08:17:21 +00:00
|
|
|
|
void wakeDispatcher();
|
|
|
|
|
|
|
|
|
|
void builder(Step::ptr step, Machine::ptr machine, std::shared_ptr<MaintainCount> reservation);
|
|
|
|
|
|
|
|
|
|
/* Perform the given build step. Return true if the step is to be
|
|
|
|
|
retried. */
|
2015-07-07 08:29:43 +00:00
|
|
|
|
bool doBuildStep(std::shared_ptr<nix::StoreAPI> store, Step::ptr step,
|
2015-07-07 08:17:21 +00:00
|
|
|
|
Machine::ptr machine);
|
|
|
|
|
|
2015-07-07 08:25:33 +00:00
|
|
|
|
void buildRemote(std::shared_ptr<nix::StoreAPI> store,
|
|
|
|
|
Machine::ptr machine, Step::ptr step,
|
|
|
|
|
unsigned int maxSilentTime, unsigned int buildTimeout,
|
|
|
|
|
RemoteResult & result);
|
|
|
|
|
|
2015-07-07 08:17:21 +00:00
|
|
|
|
void markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
2015-07-20 23:45:00 +00:00
|
|
|
|
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime);
|
2015-07-07 08:17:21 +00:00
|
|
|
|
|
|
|
|
|
bool checkCachedFailure(Step::ptr step, Connection & conn);
|
|
|
|
|
|
|
|
|
|
/* Thread that asynchronously bzips logs of finished steps. */
|
|
|
|
|
void logCompressor();
|
|
|
|
|
|
|
|
|
|
/* Thread that asynchronously invokes hydra-notify to send build
|
|
|
|
|
notifications. */
|
|
|
|
|
void notificationSender();
|
|
|
|
|
|
|
|
|
|
/* Acquire the global queue runner lock, or null if somebody else
|
|
|
|
|
has it. */
|
2015-07-07 08:29:43 +00:00
|
|
|
|
std::shared_ptr<nix::PathLocks> acquireGlobalLock();
|
2015-07-07 08:17:21 +00:00
|
|
|
|
|
|
|
|
|
void dumpStatus(Connection & conn, bool log);
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
void showStatus();
|
|
|
|
|
|
|
|
|
|
void unlock();
|
|
|
|
|
|
|
|
|
|
void run(BuildID buildOne = 0);
|
|
|
|
|
};
|
2015-07-21 13:14:17 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <class C, class V>
|
|
|
|
|
bool has(const C & c, const V & v)
|
|
|
|
|
{
|
|
|
|
|
return c.find(v) != c.end();
|
|
|
|
|
}
|