Heuristically detect if a build may have failed due to a full disk
This will allow Hydra to detect that a build should not be marked as "permanently failed", allowing it to be retried later.
This commit is contained in:
parent
e81d38c02b
commit
00d30496ca
|
@ -33,7 +33,6 @@
|
||||||
|
|
||||||
#include <bzlib.h>
|
#include <bzlib.h>
|
||||||
|
|
||||||
|
|
||||||
/* Includes required for chroot support. */
|
/* Includes required for chroot support. */
|
||||||
#if HAVE_SYS_PARAM_H
|
#if HAVE_SYS_PARAM_H
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
|
@ -60,12 +59,15 @@
|
||||||
#include <netinet/ip.h>
|
#include <netinet/ip.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if HAVE_SYS_PERSONALITY_H
|
#if HAVE_SYS_PERSONALITY_H
|
||||||
#include <sys/personality.h>
|
#include <sys/personality.h>
|
||||||
#define CAN_DO_LINUX32_BUILDS
|
#define CAN_DO_LINUX32_BUILDS
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_STATVFS
|
||||||
|
#include <sys/statvfs.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
namespace nix {
|
namespace nix {
|
||||||
|
|
||||||
|
@ -1383,6 +1385,25 @@ void DerivationGoal::buildDone()
|
||||||
root. */
|
root. */
|
||||||
if (buildUser.enabled()) buildUser.kill();
|
if (buildUser.enabled()) buildUser.kill();
|
||||||
|
|
||||||
|
/* If the build failed, heuristically check whether this may have
|
||||||
|
been caused by a disk full condition. We have no way of
|
||||||
|
knowing whether the build actually got an ENOSPC. So instead,
|
||||||
|
check if the disk is (nearly) full now. If so, we don't mark
|
||||||
|
this build as a permanent failure. */
|
||||||
|
bool diskFull = false;
|
||||||
|
#if HAVE_STATVFS
|
||||||
|
if (!statusOk(status)) {
|
||||||
|
unsigned long long required = 8ULL * 1024 * 1024; // FIXME: make configurable
|
||||||
|
struct statvfs st;
|
||||||
|
if (statvfs(settings.nixStore.c_str(), &st) == 0 &&
|
||||||
|
(unsigned long long) st.f_bavail * st.f_bsize < required)
|
||||||
|
diskFull = true;
|
||||||
|
if (statvfs(tmpDir.c_str(), &st) == 0 &&
|
||||||
|
(unsigned long long) st.f_bavail * st.f_bsize < required)
|
||||||
|
diskFull = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
/* Some cleanup per path. We do this here and not in
|
/* Some cleanup per path. We do this here and not in
|
||||||
|
@ -1449,6 +1470,8 @@ void DerivationGoal::buildDone()
|
||||||
deleteTmpDir(false);
|
deleteTmpDir(false);
|
||||||
if (WIFEXITED(status) && WEXITSTATUS(status) == childSetupFailed)
|
if (WIFEXITED(status) && WEXITSTATUS(status) == childSetupFailed)
|
||||||
throw Error(format("failed to set up the build environment for `%1%'") % drvPath);
|
throw Error(format("failed to set up the build environment for `%1%'") % drvPath);
|
||||||
|
if (diskFull)
|
||||||
|
printMsg(lvlError, "note: build failure may have been caused by lack of free disk space");
|
||||||
throw BuildError(format("builder for `%1%' %2%")
|
throw BuildError(format("builder for `%1%' %2%")
|
||||||
% drvPath % statusToString(status));
|
% drvPath % statusToString(status));
|
||||||
}
|
}
|
||||||
|
@ -1504,7 +1527,7 @@ void DerivationGoal::buildDone()
|
||||||
foreach (DerivationOutputs::iterator, i, drv.outputs)
|
foreach (DerivationOutputs::iterator, i, drv.outputs)
|
||||||
worker.store.registerFailedPath(i->second.path);
|
worker.store.registerFailedPath(i->second.path);
|
||||||
|
|
||||||
worker.permanentFailure = !hookError && !fixedOutput;
|
worker.permanentFailure = !hookError && !fixedOutput && !diskFull;
|
||||||
amDone(ecFailed);
|
amDone(ecFailed);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -456,8 +456,8 @@ void LocalStore::makeStoreWritable()
|
||||||
if (getuid() != 0) return;
|
if (getuid() != 0) return;
|
||||||
/* Check if /nix/store is on a read-only mount. */
|
/* Check if /nix/store is on a read-only mount. */
|
||||||
struct statvfs stat;
|
struct statvfs stat;
|
||||||
if (statvfs(settings.nixStore.c_str(), &stat) !=0)
|
if (statvfs(settings.nixStore.c_str(), &stat) != 0)
|
||||||
throw SysError("Getting info of nix store mountpoint");
|
throw SysError("getting info about the Nix store mount point");
|
||||||
|
|
||||||
if (stat.f_flag & ST_RDONLY) {
|
if (stat.f_flag & ST_RDONLY) {
|
||||||
if (unshare(CLONE_NEWNS) == -1)
|
if (unshare(CLONE_NEWNS) == -1)
|
||||||
|
|
Loading…
Reference in a new issue