Preload linked hashes to speed up lookups

By preloading all inodes in the /nix/store/.links directory, we can
quickly determine of a hardlinked file was already linked to the hashed
links.
This is tolerant of removing the .links directory, it will simply
recalculate all hashes in the store.
This commit is contained in:
Wout Mertens 2014-05-13 23:10:06 +02:00
parent a84f503d86
commit e974f20c98
2 changed files with 41 additions and 10 deletions

View file

@ -6,6 +6,11 @@
#include "util.hh" #include "util.hh"
#include "pathlocks.hh" #include "pathlocks.hh"
#if HAVE_TR1_UNORDERED_SET
#include <tr1/unordered_set>
#endif
class sqlite3; class sqlite3;
class sqlite3_stmt; class sqlite3_stmt;
@ -303,7 +308,14 @@ private:
void checkDerivationOutputs(const Path & drvPath, const Derivation & drv); void checkDerivationOutputs(const Path & drvPath, const Derivation & drv);
void optimisePath_(OptimiseStats & stats, const Path & path); #if HAVE_TR1_UNORDERED_SET
typedef std::tr1::unordered_set<ino_t> Hashes;
#else
typedef std::set<ino_t> Hashes;
#endif
void loadHashes(Hashes & hashes);
void optimisePath_(OptimiseStats & stats, const Path & path, Hashes & hashes);
// Internal versions that are not wrapped in retry_sqlite. // Internal versions that are not wrapped in retry_sqlite.
bool isValidPath_(const Path & path); bool isValidPath_(const Path & path);

View file

@ -39,8 +39,22 @@ struct MakeReadOnly
} }
}; };
// TODO Make this a map and keep count and size stats, for giggles
void LocalStore::loadHashes(Hashes & hashes)
{
printMsg(lvlDebug, "loading hash inodes in memory");
Strings names = readDirectory(linksDir);
foreach (Strings::iterator, i, names) {
struct stat st;
string path = linksDir + "/" + *i;
if (lstat(path.c_str(), &st))
throw SysError(format("getting attributes of path `%1%'") % path);
hashes.insert(st.st_ino);
}
printMsg(lvlDebug, format("loaded %1% hashes") % hashes.size());
}
void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path) void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path, Hashes & hashes)
{ {
checkInterrupt(); checkInterrupt();
@ -51,7 +65,7 @@ void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path)
if (S_ISDIR(st.st_mode)) { if (S_ISDIR(st.st_mode)) {
Strings names = readDirectory(path); Strings names = readDirectory(path);
foreach (Strings::iterator, i, names) foreach (Strings::iterator, i, names)
optimisePath_(stats, path + "/" + *i); optimisePath_(stats, path + "/" + *i, hashes);
return; return;
} }
@ -73,10 +87,7 @@ void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path)
stats.totalFiles++; stats.totalFiles++;
/* If a store inode has 2 or more links we presume that it was if (st.st_nlink > 1 && hashes.count(st.st_ino)) {
already linked by us */
/* TODO: allow overriding this behavior */
if (st.st_nlink > 1) {
printMsg(lvlDebug, format("`%1%' is already linked, with %2% other file(s).") % path % (st.st_nlink - 2)); printMsg(lvlDebug, format("`%1%' is already linked, with %2% other file(s).") % path % (st.st_nlink - 2));
return; return;
} }
@ -98,7 +109,10 @@ void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path)
if (!pathExists(linkPath)) { if (!pathExists(linkPath)) {
/* Nope, create a hard link in the links directory. */ /* Nope, create a hard link in the links directory. */
if (link(path.c_str(), linkPath.c_str()) == 0) return; if (link(path.c_str(), linkPath.c_str()) == 0) {
hashes.insert(st.st_ino);
return;
}
if (errno != EEXIST) if (errno != EEXIST)
throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path); throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path);
/* Fall through if another process created linkPath before /* Fall through if another process created linkPath before
@ -169,12 +183,15 @@ void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path)
void LocalStore::optimiseStore(OptimiseStats & stats) void LocalStore::optimiseStore(OptimiseStats & stats)
{ {
PathSet paths = queryAllValidPaths(); PathSet paths = queryAllValidPaths();
Hashes hashes;
loadHashes(hashes);
foreach (PathSet::iterator, i, paths) { foreach (PathSet::iterator, i, paths) {
addTempRoot(*i); addTempRoot(*i);
if (!isValidPath(*i)) continue; /* path was GC'ed, probably */ if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i); startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
optimisePath_(stats, *i); optimisePath_(stats, *i, hashes);
} }
} }
@ -182,7 +199,9 @@ void LocalStore::optimiseStore(OptimiseStats & stats)
void LocalStore::optimisePath(const Path & path) void LocalStore::optimisePath(const Path & path)
{ {
OptimiseStats stats; OptimiseStats stats;
if (settings.autoOptimiseStore) optimisePath_(stats, path); Hashes hashes;
if (settings.autoOptimiseStore) optimisePath_(stats, path, hashes);
} }