Yet another rewrite of the garbage collector

But this time it's *obviously* correct!  No more segfaults due to
infinite recursions for sure, etc.

Also, move directories to /nix/store/trash instead of renaming them to
/nix/store/bla-gc-<pid>.  Then we can just delete /nix/store/trash at
the end.
This commit is contained in:
Eelco Dolstra 2012-12-20 17:32:15 +01:00
parent 9c29a2ed35
commit 06f62defe6
2 changed files with 145 additions and 138 deletions

View file

@ -396,24 +396,17 @@ struct LocalStore::GCState
GCResults & results; GCResults & results;
PathSet roots; PathSet roots;
PathSet tempRoots; PathSet tempRoots;
PathSet deleted; PathSet dead;
PathSet live; PathSet alive;
PathSet busy;
PathSet invalidated;
bool gcKeepOutputs; bool gcKeepOutputs;
bool gcKeepDerivations; bool gcKeepDerivations;
unsigned long long bytesInvalidated; unsigned long long bytesInvalidated;
Path trashDir;
bool shouldDelete;
GCState(GCResults & results_) : results(results_), bytesInvalidated(0) { } GCState(GCResults & results_) : results(results_), bytesInvalidated(0) { }
}; };
static bool shouldDelete(GCOptions::GCAction action)
{
return action == GCOptions::gcDeleteDead
|| action == GCOptions::gcDeleteSpecific;
}
bool LocalStore::isActiveTempFile(const GCState & state, bool LocalStore::isActiveTempFile(const GCState & state,
const Path & path, const string & suffix) const Path & path, const string & suffix)
{ {
@ -424,152 +417,148 @@ bool LocalStore::isActiveTempFile(const GCState & state,
void LocalStore::deleteGarbage(GCState & state, const Path & path) void LocalStore::deleteGarbage(GCState & state, const Path & path)
{ {
printMsg(lvlInfo, format("deleting `%1%'") % path);
unsigned long long bytesFreed; unsigned long long bytesFreed;
deletePathWrapped(path, bytesFreed); deletePathWrapped(path, bytesFreed);
state.results.bytesFreed += bytesFreed; state.results.bytesFreed += bytesFreed;
} }
bool LocalStore::tryToDelete(GCState & state, const Path & path) void LocalStore::deletePathRecursive(GCState & state, const Path & path)
{ {
checkInterrupt(); checkInterrupt();
if (path == linksDir) return true; unsigned long long size = 0;
if (isValidPath(path)) {
PathSet referrers;
queryReferrers(path, referrers);
foreach (PathSet::iterator, i, referrers)
if (*i != path) deletePathRecursive(state, *i);
size = queryPathInfo(path).narSize;
invalidatePathChecked(path);
}
struct stat st; struct stat st;
if (lstat(path.c_str(), &st)) { if (lstat(path.c_str(), &st)) {
if (errno == ENOENT) return true; if (errno == ENOENT) return;
throw SysError(format("getting status of %1%") % path); throw SysError(format("getting status of %1%") % path);
} }
if (state.deleted.find(path) != state.deleted.end()) return true; printMsg(lvlInfo, format("deleting `%1%'") % path);
if (state.live.find(path) != state.live.end()) return false;
/* If the path is not a regular file or symlink, move it to the
trash directory. The move is to ensure that later (when we're
not holding the global GC lock) we can delete the path without
being afraid that the path has become alive again. Otherwise
delete it right away. */
if (S_ISDIR(st.st_mode)) {
// Estimate the amount freed using the narSize field. FIXME:
// if the path was not valid, need to determine the actual
// size.
state.bytesInvalidated += size;
makeMutable(path.c_str());
// Mac OS X cannot rename directories if they are read-only.
if (chmod(path.c_str(), st.st_mode | S_IWUSR) == -1)
throw SysError(format("making `%1%' writable") % path);
Path tmp = state.trashDir + "/" + baseNameOf(path);
if (rename(path.c_str(), tmp.c_str()))
throw SysError(format("unable to rename `%1%' to `%2%'") % path % tmp);
} else
deleteGarbage(state, path);
if (state.results.bytesFreed + state.bytesInvalidated > state.options.maxFreed) {
printMsg(lvlInfo, format("deleted or invalidated more than %1% bytes; stopping") % state.options.maxFreed);
throw GCLimitReached();
}
}
bool LocalStore::canReachRoot(GCState & state, PathSet & visited, const Path & path)
{
if (visited.find(path) != visited.end()) return false;
if (state.alive.find(path) != state.alive.end()) {
return true;
}
if (state.dead.find(path) != state.dead.end()) {
return false;
}
if (state.roots.find(path) != state.roots.end()) {
printMsg(lvlDebug, format("cannot delete `%1%' because it's a root") % path);
state.alive.insert(path);
return true;
}
visited.insert(path);
if (!isValidPath(path)) return false;
PathSet incoming;
/* Don't delete this path if any of its referrers are alive. */
queryReferrers(path, incoming);
/* If gc-keep-derivations is set and this is a derivation, then
don't delete the derivation if any of the outputs are alive. */
if (state.gcKeepDerivations && isDerivation(path)) {
PathSet outputs = queryDerivationOutputs(path);
foreach (PathSet::iterator, i, outputs)
if (isValidPath(*i) && queryDeriver(*i) == path)
incoming.insert(*i);
}
/* If gc-keep-outputs is set, then don't delete this path if there
are derivers of this path that are not garbage. */
if (state.gcKeepOutputs) {
PathSet derivers = queryValidDerivers(path);
foreach (PathSet::iterator, i, derivers)
incoming.insert(*i);
}
foreach (PathSet::iterator, i, incoming)
if (*i != path)
if (canReachRoot(state, visited, *i)) {
state.alive.insert(path);
return true;
}
return false;
}
void LocalStore::tryToDelete(GCState & state, const Path & path)
{
checkInterrupt();
if (path == linksDir || path == state.trashDir) return;
startNest(nest, lvlDebug, format("considering whether to delete `%1%'") % path); startNest(nest, lvlDebug, format("considering whether to delete `%1%'") % path);
/* If gc-keep-outputs and gc-keep-derivations are both set, we can if (!isValidPath(path)) {
have cycles in the liveness graph, so we need to treat such
strongly connected components as a single unit (paths). That
is, we can delete the elements of paths only if all referrers
of paths are garbage. */
PathSet paths, referrers;
Paths pathsSorted;
if (isValidPath(path)) {
/* Add derivers and outputs of path to paths. */
PathSet todo;
todo.insert(path);
while (!todo.empty()) {
Path p = *todo.begin();
assertStorePath(p);
todo.erase(p);
if (paths.find(p) != paths.end()) continue;
paths.insert(p);
/* If gc-keep-derivations is set and this is a derivation,
then don't delete the derivation if any of the outputs
are live. */
if (state.gcKeepDerivations && isDerivation(p)) {
PathSet outputs = queryDerivationOutputs(p);
foreach (PathSet::iterator, i, outputs)
if (isValidPath(*i) && queryDeriver(*i) == p) todo.insert(*i);
}
/* If gc-keep-outputs is set, then don't delete this path
if there are derivers of this path that are not
garbage. */
if (state.gcKeepOutputs) {
PathSet derivers = queryValidDerivers(p);
foreach (PathSet::iterator, i, derivers) todo.insert(*i);
}
}
}
else {
/* A lock file belonging to a path that we're building right /* A lock file belonging to a path that we're building right
now isn't garbage. */ now isn't garbage. */
if (isActiveTempFile(state, path, ".lock")) return false; if (isActiveTempFile(state, path, ".lock")) return;
/* Don't delete .chroot directories for derivations that are /* Don't delete .chroot directories for derivations that are
currently being built. */ currently being built. */
if (isActiveTempFile(state, path, ".chroot")) return false; if (isActiveTempFile(state, path, ".chroot")) return;
paths.insert(path);
} }
/* Check if any path in paths is a root. */ PathSet visited;
foreach (PathSet::iterator, i, paths)
if (state.roots.find(*i) != state.roots.end()) {
printMsg(lvlDebug, format("cannot delete `%1%' because it's a root") % *i);
goto isLive;
}
/* Recursively try to delete the referrers of this strongly if (canReachRoot(state, visited, path)) {
connected component. If any referrer can't be deleted, then printMsg(lvlDebug, format("cannot delete `%1%' because it's still reachable") % path);
these paths can't be deleted either. */ } else {
foreach (PathSet::iterator, i, paths) /* No path we visited was a root, so everything is garbage.
if (isValidPath(*i)) queryReferrers(*i, referrers); But we only delete path and its referrers here so that
nix-store --delete doesn't have the unexpected effect of
foreach (PathSet::iterator, i, referrers) recursing into derivations and outputs. */
if (paths.find(*i) == paths.end() && !tryToDelete(state, *i)) { state.dead.insert(visited.begin(), visited.end());
printMsg(lvlDebug, format("cannot delete `%1%' because it has live referrers") % *i); if (state.shouldDelete)
goto isLive; deletePathRecursive(state, path);
}
/* The paths are garbage, so delete them. */
pathsSorted = topoSortPaths(*this, paths);
foreach (Paths::iterator, i, pathsSorted) {
if (shouldDelete(state.options.action)) {
/* If it's a valid path that's not a regular file or
symlink, invalidate it, rename it, and schedule it for
deletion. The renaming is to ensure that later (when
we're not holding the global GC lock) we can delete the
path without being afraid that the path has become
alive again. Otherwise delete it right away. */
if (isValidPath(*i)) {
if (S_ISDIR(st.st_mode)) {
printMsg(lvlInfo, format("invalidating `%1%'") % *i);
// Estimate the amount freed using the narSize field.
state.bytesInvalidated += queryPathInfo(*i).narSize;
invalidatePathChecked(*i);
makeMutable(i->c_str());
// Mac OS X cannot rename directories if they are read-only.
if (chmod(i->c_str(), st.st_mode | S_IWUSR) == -1)
throw SysError(format("making `%1%' writable") % *i);
Path tmp = (format("%1%-gc-%2%") % *i % getpid()).str();
if (rename(i->c_str(), tmp.c_str()))
throw SysError(format("unable to rename `%1%' to `%2%'") % *i % tmp);
state.invalidated.insert(tmp);
} else {
invalidatePathChecked(*i);
deleteGarbage(state, *i);
}
} else
deleteGarbage(state, *i);
if (state.results.bytesFreed + state.bytesInvalidated > state.options.maxFreed) {
printMsg(lvlInfo, format("deleted or invalidated more than %1% bytes; stopping") % state.options.maxFreed);
throw GCLimitReached();
}
} else
printMsg(lvlTalkative, format("would delete `%1%'") % *i);
state.deleted.insert(*i);
if (state.options.action != GCOptions::gcReturnLive)
state.results.paths.insert(*i);
} }
return true;
isLive:
foreach (PathSet::iterator, i, paths) {
state.live.insert(*i);
if (state.options.action == GCOptions::gcReturnLive)
state.results.paths.insert(*i);
}
return false;
} }
@ -625,7 +614,7 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
{ {
GCState state(results); GCState state(results);
state.options = options; state.options = options;
state.trashDir = settings.nixStore + "/trash";
state.gcKeepOutputs = settings.gcKeepOutputs; state.gcKeepOutputs = settings.gcKeepOutputs;
state.gcKeepDerivations = settings.gcKeepDerivations; state.gcKeepDerivations = settings.gcKeepDerivations;
@ -638,6 +627,8 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
state.gcKeepDerivations = false; state.gcKeepDerivations = false;
} }
state.shouldDelete = options.action == GCOptions::gcDeleteDead || options.action == GCOptions::gcDeleteSpecific;
/* Acquire the global GC root. This prevents /* Acquire the global GC root. This prevents
a) New roots from being added. a) New roots from being added.
b) Processes from creating new temporary root files. */ b) Processes from creating new temporary root files. */
@ -668,6 +659,8 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
increase, since we hold locks on everything. So everything increase, since we hold locks on everything. So everything
that is not reachable from `roots'. */ that is not reachable from `roots'. */
if (state.shouldDelete) createDirs(state.trashDir);
/* Now either delete all garbage paths, or just the specified /* Now either delete all garbage paths, or just the specified
paths (for gcDeleteSpecific). */ paths (for gcDeleteSpecific). */
@ -675,13 +668,14 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
foreach (PathSet::iterator, i, options.pathsToDelete) { foreach (PathSet::iterator, i, options.pathsToDelete) {
assertStorePath(*i); assertStorePath(*i);
if (!tryToDelete(state, *i)) tryToDelete(state, *i);
if (state.dead.find(*i) == state.dead.end())
throw Error(format("cannot delete path `%1%' since it is still alive") % *i); throw Error(format("cannot delete path `%1%' since it is still alive") % *i);
} }
} else if (options.maxFreed > 0) { } else if (options.maxFreed > 0) {
if (shouldDelete(state.options.action)) if (state.shouldDelete)
printMsg(lvlError, format("deleting garbage...")); printMsg(lvlError, format("deleting garbage..."));
else else
printMsg(lvlError, format("determining live/dead paths...")); printMsg(lvlError, format("determining live/dead paths..."));
@ -727,13 +721,22 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
} }
} }
if (state.options.action == GCOptions::gcReturnLive) {
state.results.paths = state.alive;
return;
}
if (state.options.action == GCOptions::gcReturnDead) {
state.results.paths = state.dead;
return;
}
/* Allow other processes to add to the store from here on. */ /* Allow other processes to add to the store from here on. */
fdGCLock.close(); fdGCLock.close();
/* Delete the invalidated paths now that the lock has been /* Delete the trash directory. */
released. */ printMsg(lvlInfo, format("deleting `%1%'") % state.trashDir);
foreach (PathSet::iterator, i, state.invalidated) deleteGarbage(state, state.trashDir);
deleteGarbage(state, *i);
/* Clean up the links directory. */ /* Clean up the links directory. */
if (options.action == GCOptions::gcDeleteDead || options.action == GCOptions::gcDeleteSpecific) { if (options.action == GCOptions::gcDeleteDead || options.action == GCOptions::gcDeleteSpecific) {

View file

@ -276,7 +276,11 @@ private:
void deleteGarbage(GCState & state, const Path & path); void deleteGarbage(GCState & state, const Path & path);
bool tryToDelete(GCState & state, const Path & path); void tryToDelete(GCState & state, const Path & path);
bool canReachRoot(GCState & state, PathSet & visited, const Path & path);
void deletePathRecursive(GCState & state, const Path & path);
bool isActiveTempFile(const GCState & state, bool isActiveTempFile(const GCState & state,
const Path & path, const string & suffix); const Path & path, const string & suffix);