* Remove the --use-atime / --max-atime garbage collector flags. Many

(Linux) machines no longer maintain the atime because it's too
  expensive, and on the machines where --use-atime is useful (like the
  buildfarm), reading the atimes on the entire Nix store takes way too
  much time to make it practical.
This commit is contained in:
Eelco Dolstra 2009-11-20 17:12:38 +00:00
parent 997db91e07
commit 8824d60fe5
7 changed files with 11 additions and 203 deletions

View file

@ -213,8 +213,6 @@ linkend="sec-nix-build"><command>nix-build</command></link> does.</para>
</group>
<arg><option>--max-freed</option> <replaceable>bytes</replaceable></arg>
<arg><option>--max-links</option> <replaceable>nrlinks</replaceable></arg>
<arg><option>--max-atime</option> <replaceable>atime</replaceable></arg>
<arg><option>--use-atime</option></arg>
</cmdsynopsis>
</refsection>
@ -292,42 +290,6 @@ options control what gets deleted and in what order:
</varlistentry>
<varlistentry><term><option>--max-atime</option> <replaceable>atime</replaceable></term>
<listitem><para>Only delete a store path if its last-accessed time
is less than <replaceable>atime</replaceable>. This allows you to
garbage-collect only packages that havent been used recently.
The time is expressed as the number of seconds in the Unix epoch,
i.e., since 1970-01-01 00:00:00 UTC. An easy way to convert to
this format is <literal>date +%s -d "<replaceable>date
specification</replaceable>"</literal>.</para>
<para>For directories, the last-accessed time is the highest
last-accessed time of any regular file in the directory (or in any
of its subdirectories). That is, the <literal>atime</literal>
field maintained by the filesystem is ignored for directories.
This is because operations such as rebuilding the
<command>locate</command> database tend to update the
<literal>atime</literal> values of all directories, so theyre not
a useful indicator of whether a package was recently used.</para>
<para>Note that <command>nix-store --optimise</command> reads all
regular files in the Nix store, and so causes all last-accessed
times to be set to the present time. This makes
<option>--max-atime</option> ineffective (for a while at
least).</para></listitem>
</varlistentry>
<varlistentry><term><option>--use-atime</option></term>
<listitem><para>Delete store paths in order of ascending
last-accessed time. This is useful in conjunction with the other
options to delete only the least recently used
packages.</para></listitem>
</varlistentry>
</variablelist>
</para>
@ -358,13 +320,6 @@ deleting `/nix/store/kq82idx6g0nyzsp2s14gfsc38npai7lf-cairo-1.0.4.tar.gz.drv'
</para>
<para>To delete unreachable paths not accessed in the last two months:
<screen>
$ nix-store --gc -v --max-atime $(date +%s -d "2 months ago")</screen>
</para>
<para>To delete at least 100 MiBs of unreachable paths:
<screen>

View file

@ -439,30 +439,6 @@ Paths topoSortPaths(const PathSet & paths)
}
static time_t lastFileAccessTime(const Path & path)
{
checkInterrupt();
struct stat st;
if (lstat(path.c_str(), &st) == -1)
throw SysError(format("statting `%1%'") % path);
if (S_ISDIR(st.st_mode)) {
time_t last = 0;
Strings names = readDirectory(path);
foreach (Strings::iterator, i, names) {
time_t t = lastFileAccessTime(path + "/" + *i);
if (t > last) last = t;
}
return last;
}
else if (S_ISLNK(st.st_mode)) return 0;
else return st.st_atime;
}
struct GCLimitReached { };
@ -522,35 +498,6 @@ void LocalStore::gcPathRecursive(const GCOptions & options,
}
struct CachingAtimeComparator : public std::binary_function<Path, Path, bool>
{
std::map<Path, time_t> cache;
time_t lookup(const Path & p)
{
std::map<Path, time_t>::iterator i = cache.find(p);
if (i != cache.end()) return i->second;
debug(format("computing atime of `%1%'") % p);
cache[p] = lastFileAccessTime(p);
assert(cache.find(p) != cache.end());
return cache[p];
}
bool operator () (const Path & p1, const Path & p2)
{
return lookup(p2) < lookup(p1);
}
};
static string showTime(const string & format, time_t t)
{
char s[128];
strftime(s, sizeof s, format.c_str(), localtime(&t));
return string(s);
}
static bool isLive(const Path & path, const PathSet & livePaths,
const PathSet & tempRoots, const PathSet & tempRootsClosed)
{
@ -699,87 +646,14 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results)
}
/* Delete all dead store paths (or until one of the stop
conditions is reached). */
conditions is reached), respecting the partial ordering
determined by the references graph. */
PathSet done;
try {
if (!options.useAtime) {
/* Delete the paths, respecting the partial ordering
determined by the references graph. */
printMsg(lvlError, format("deleting garbage..."));
foreach (PathSet::iterator, i, storePaths)
gcPathRecursive(options, results, done, *i);
}
else {
/* Delete in order of ascending last access time, still
maintaining the partial ordering of the reference
graph. Note that we can't use a topological sort for
this because that takes time O(V+E), and in this case
E=O(V^2) (i.e. the graph is dense because of the edges
due to the atime ordering). So instead we put all
deletable paths in a priority queue (ordered by atime),
and after deleting a path, add additional paths that
have become deletable to the priority queue. */
CachingAtimeComparator atimeComp;
/* Create a priority queue that orders paths by ascending
atime. This is why C++ needs type inferencing... */
std::priority_queue<Path, vector<Path>, binary_function_ref_adapter<CachingAtimeComparator> > prioQueue =
std::priority_queue<Path, vector<Path>, binary_function_ref_adapter<CachingAtimeComparator> >(binary_function_ref_adapter<CachingAtimeComparator>(&atimeComp));
/* Initially put the paths that are invalid or have no
referrers into the priority queue. */
printMsg(lvlError, format("finding deletable paths..."));
foreach (PathSet::iterator, i, storePaths) {
checkInterrupt();
/* We can safely delete a path if it's invalid or
it has no referrers. Note that all the invalid
paths will be deleted in the first round. */
if (isValidPath(*i)) {
if (queryReferrersNoSelf(*i).empty()) prioQueue.push(*i);
} else prioQueue.push(*i);
}
debug(format("%1% initially deletable paths") % prioQueue.size());
/* Now delete everything in the order of the priority
queue until nothing is left. */
printMsg(lvlError, format("deleting garbage..."));
while (!prioQueue.empty()) {
checkInterrupt();
Path path = prioQueue.top(); prioQueue.pop();
if (options.maxAtime != (time_t) -1 &&
atimeComp.lookup(path) > options.maxAtime)
continue;
printMsg(lvlInfo, format("deleting `%1%' (last accessed %2%)") % path % showTime("%F %H:%M:%S", atimeComp.lookup(path)));
PathSet references;
if (isValidPath(path)) references = queryReferencesNoSelf(path);
gcPath(options, results, path);
/* For each reference of the current path, see if the
reference has now become deletable (i.e. is in the
set of dead paths and has no referrers left). If
so add it to the priority queue. */
foreach (PathSet::iterator, i, references) {
if (storePaths.find(*i) != storePaths.end() &&
queryReferrersNoSelf(*i).empty())
{
debug(format("path `%1%' has become deletable") % *i);
prioQueue.push(*i);
}
}
}
}
printMsg(lvlError, format("deleting garbage..."));
foreach (PathSet::iterator, i, storePaths)
gcPathRecursive(options, results, done, *i);
} catch (GCLimitReached & e) {
}
}

View file

@ -426,8 +426,9 @@ void RemoteStore::collectGarbage(const GCOptions & options, GCResults & results)
writeLongLong(options.maxFreed, to);
writeInt(options.maxLinks, to);
if (GET_PROTOCOL_MINOR(daemonVersion) >= 5) {
writeInt(options.useAtime, to);
writeInt(options.maxAtime, to);
/* removed options */
writeInt(0, to);
writeInt(0, to);
}
processStderr();

View file

@ -14,8 +14,6 @@ GCOptions::GCOptions()
ignoreLiveness = false;
maxFreed = 0;
maxLinks = 0;
useAtime = false;
maxAtime = (time_t) -1;
}

View file

@ -64,22 +64,6 @@ struct GCOptions
has dropped below `maxLinks'. */
unsigned int maxLinks;
/* Delete paths in order of ascending last access time. I.e.,
prefer deleting unrecently used paths. Useful in conjunction
with `maxFreed' and `maxLinks' (or manual interruption). The
access time of a path is defined as the highest atime of any
non-directory, non-symlink file under that path. Directories
and symlinks are ignored because their atimes are frequently
mass-updated, e.g. by `locate'. Note that optimiseStore()
somewhat reduces the usefulness of this option: it hard-links
regular files and symlink together, giving them a "shared"
atime. */
bool useAtime;
/* Do not delete paths newer than `maxAtime'. -1 means no age
limit. */
time_t maxAtime;
GCOptions();
};

View file

@ -536,11 +536,6 @@ static void opGC(Strings opFlags, Strings opArgs)
if (options.maxFreed == 0) options.maxFreed = 1;
}
else if (*i == "--max-links") options.maxLinks = getIntArg(*i, i, opFlags.end());
else if (*i == "--use-atime") options.useAtime = true;
else if (*i == "--max-atime") {
options.useAtime = true;
options.maxAtime = getIntArg(*i, i, opFlags.end());
}
else throw UsageError(format("bad sub-operation `%1%' in GC") % *i);
if (!opArgs.empty()) throw UsageError("no arguments expected");

View file

@ -464,8 +464,9 @@ static void performOp(unsigned int clientVersion,
options.maxFreed = readLongLong(from);
options.maxLinks = readInt(from);
if (GET_PROTOCOL_MINOR(clientVersion) >= 5) {
options.useAtime = readInt(from);
options.maxAtime = readInt(from);
/* removed options */
readInt(from);
readInt(from);
}
GCResults results;