Heuristically detect if a build may have failed due to a full disk

This will allow Hydra to detect that a build should not be marked as
"permanently failed", allowing it to be retried later.
This commit is contained in:
Eelco Dolstra 2014-02-17 14:15:56 +01:00
parent e81d38c02b
commit 00d30496ca
2 changed files with 28 additions and 5 deletions

View file

@ -33,7 +33,6 @@
#include <bzlib.h> #include <bzlib.h>
/* Includes required for chroot support. */ /* Includes required for chroot support. */
#if HAVE_SYS_PARAM_H #if HAVE_SYS_PARAM_H
#include <sys/param.h> #include <sys/param.h>
@ -60,12 +59,15 @@
#include <netinet/ip.h> #include <netinet/ip.h>
#endif #endif
#if HAVE_SYS_PERSONALITY_H #if HAVE_SYS_PERSONALITY_H
#include <sys/personality.h> #include <sys/personality.h>
#define CAN_DO_LINUX32_BUILDS #define CAN_DO_LINUX32_BUILDS
#endif #endif
#if HAVE_STATVFS
#include <sys/statvfs.h>
#endif
namespace nix { namespace nix {
@ -1383,6 +1385,25 @@ void DerivationGoal::buildDone()
root. */ root. */
if (buildUser.enabled()) buildUser.kill(); if (buildUser.enabled()) buildUser.kill();
/* If the build failed, heuristically check whether this may have
been caused by a disk full condition. We have no way of
knowing whether the build actually got an ENOSPC. So instead,
check if the disk is (nearly) full now. If so, we don't mark
this build as a permanent failure. */
bool diskFull = false;
#if HAVE_STATVFS
if (!statusOk(status)) {
unsigned long long required = 8ULL * 1024 * 1024; // FIXME: make configurable
struct statvfs st;
if (statvfs(settings.nixStore.c_str(), &st) == 0 &&
(unsigned long long) st.f_bavail * st.f_bsize < required)
diskFull = true;
if (statvfs(tmpDir.c_str(), &st) == 0 &&
(unsigned long long) st.f_bavail * st.f_bsize < required)
diskFull = true;
}
#endif
try { try {
/* Some cleanup per path. We do this here and not in /* Some cleanup per path. We do this here and not in
@ -1449,6 +1470,8 @@ void DerivationGoal::buildDone()
deleteTmpDir(false); deleteTmpDir(false);
if (WIFEXITED(status) && WEXITSTATUS(status) == childSetupFailed) if (WIFEXITED(status) && WEXITSTATUS(status) == childSetupFailed)
throw Error(format("failed to set up the build environment for `%1%'") % drvPath); throw Error(format("failed to set up the build environment for `%1%'") % drvPath);
if (diskFull)
printMsg(lvlError, "note: build failure may have been caused by lack of free disk space");
throw BuildError(format("builder for `%1%' %2%") throw BuildError(format("builder for `%1%' %2%")
% drvPath % statusToString(status)); % drvPath % statusToString(status));
} }
@ -1504,7 +1527,7 @@ void DerivationGoal::buildDone()
foreach (DerivationOutputs::iterator, i, drv.outputs) foreach (DerivationOutputs::iterator, i, drv.outputs)
worker.store.registerFailedPath(i->second.path); worker.store.registerFailedPath(i->second.path);
worker.permanentFailure = !hookError && !fixedOutput; worker.permanentFailure = !hookError && !fixedOutput && !diskFull;
amDone(ecFailed); amDone(ecFailed);
return; return;
} }

View file

@ -457,7 +457,7 @@ void LocalStore::makeStoreWritable()
/* Check if /nix/store is on a read-only mount. */ /* Check if /nix/store is on a read-only mount. */
struct statvfs stat; struct statvfs stat;
if (statvfs(settings.nixStore.c_str(), &stat) != 0) if (statvfs(settings.nixStore.c_str(), &stat) != 0)
throw SysError("Getting info of nix store mountpoint"); throw SysError("getting info about the Nix store mount point");
if (stat.f_flag & ST_RDONLY) { if (stat.f_flag & ST_RDONLY) {
if (unshare(CLONE_NEWNS) == -1) if (unshare(CLONE_NEWNS) == -1)