optimiseStore(): Use a content-addressed file store in /nix/store/.links

optimiseStore() now creates persistent, content-addressed hard links
in /nix/store/.links.  For instance, if it encounters a file P with
hash H, it will create a hard link

  P' = /nix/store/.link/<H>

to P if P' doesn't already exist; if P' exist, then P is replaced by a
hard link to P'.  This is better than the previous in-memory map,
because it had the tendency to unnecessarily replace hard links with a
hard link to whatever happened to be the first file with a given hash
it encountered.  It also allows on-the-fly, incremental optimisation.
This commit is contained in:
Eelco Dolstra 2012-07-23 12:08:34 -04:00
parent ed59bf7a18
commit 564fb7d9fa
3 changed files with 109 additions and 114 deletions

View file

@ -167,7 +167,7 @@ public:
/* Optimise the disk space usage of the Nix store by hard-linking /* Optimise the disk space usage of the Nix store by hard-linking
files with the same contents. */ files with the same contents. */
void optimiseStore(bool dryRun, OptimiseStats & stats); void optimiseStore(OptimiseStats & stats);
/* Check the integrity of the Nix store. */ /* Check the integrity of the Nix store. */
void verifyStore(bool checkContents); void verifyStore(bool checkContents);

View file

@ -1,6 +1,7 @@
#include "util.hh" #include "util.hh"
#include "local-store.hh" #include "local-store.hh"
#include "immutable.hh" #include "immutable.hh"
#include "globals.hh"
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
@ -12,9 +13,6 @@
namespace nix { namespace nix {
typedef std::map<Hash, std::pair<Path, ino_t> > HashToPath;
static void makeWritable(const Path & path) static void makeWritable(const Path & path)
{ {
struct stat st; struct stat st;
@ -51,132 +49,135 @@ struct MakeImmutable
}; };
static void hashAndLink(bool dryRun, HashToPath & hashToPath, const string linksDir = ".links";
OptimiseStats & stats, const Path & path)
static void hashAndLink(OptimiseStats & stats, const Path & path)
{ {
struct stat st; struct stat st;
if (lstat(path.c_str(), &st)) if (lstat(path.c_str(), &st))
throw SysError(format("getting attributes of path `%1%'") % path); throw SysError(format("getting attributes of path `%1%'") % path);
if (S_ISDIR(st.st_mode)) {
Strings names = readDirectory(path);
foreach (Strings::iterator, i, names)
hashAndLink(stats, path + "/" + *i);
return;
}
/* We can hard link regular files and symlinks. */
if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) return;
/* Sometimes SNAFUs can cause files in the Nix store to be /* Sometimes SNAFUs can cause files in the Nix store to be
modified, in particular when running programs as root under modified, in particular when running programs as root under
NixOS (example: $fontconfig/var/cache being modified). Skip NixOS (example: $fontconfig/var/cache being modified). Skip
those files. */ those files. FIXME: check the modification time. */
if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) { if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path); printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path);
return; return;
} }
/* We can hard link regular files and symlinks. */ /* Hash the file. Note that hashPath() returns the hash over the
if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { NAR serialisation, which includes the execute bit on the file.
Thus, executable and non-executable files with the same
contents *won't* be linked (which is good because otherwise the
permissions would be screwed up).
/* Hash the file. Note that hashPath() returns the hash over Also note that if `path' is a symlink, then we're hashing the
the NAR serialisation, which includes the execute bit on contents of the symlink (i.e. the result of readlink()), not
the file. Thus, executable and non-executable files with the contents of the target (which may not even exist). */
the same contents *won't* be linked (which is good because
otherwise the permissions would be screwed up).
Also note that if `path' is a symlink, then we're hashing
the contents of the symlink (i.e. the result of
readlink()), not the contents of the target (which may not
even exist). */
Hash hash = hashPath(htSHA256, path).first; Hash hash = hashPath(htSHA256, path).first;
stats.totalFiles++; stats.totalFiles++;
printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash)); printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
std::pair<Path, ino_t> prevPath = hashToPath[hash]; /* Check if this is a known hash. */
Path linkPath = nixStore + "/" + linksDir + "/" + printHash32(hash);
if (!pathExists(linkPath)) {
/* Nope, create a hard link in the links directory. */
makeMutable(path);
MakeImmutable mk1(path);
if (link(path.c_str(), linkPath.c_str()) == -1)
throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path);
if (prevPath.first == "") {
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
return; return;
} }
/* Yes! We've seen a file with the same contents. Replace /* Yes! We've seen a file with the same contents. Replace the
the current file with a hard link to that file. */ current file with a hard link to that file. */
struct stat stLink;
if (lstat(linkPath.c_str(), &stLink))
throw SysError(format("getting attributes of path `%1%'") % linkPath);
stats.sameContents++; stats.sameContents++;
if (prevPath.second == st.st_ino) { if (st.st_ino == stLink.st_ino) {
printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % prevPath.first); printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % linkPath);
return; return;
} }
if (!dryRun) { printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % linkPath);
printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % prevPath.first); Path tempLink = (format("%1%/.tmp-link-%2%-%3%")
% nixStore % getpid() % rand()).str();
Path tempLink = (format("%1%.tmp-%2%-%3%") /* Make the containing directory writable, but only if it's not
% path % getpid() % rand()).str(); the store itself (we don't want or need to mess with its
permissions). */
/* Make the containing directory writable, but only if
it's not the store itself (we don't want or need to
mess with its permissions). */
bool mustToggle = !isStorePath(path); bool mustToggle = !isStorePath(path);
if (mustToggle) makeWritable(dirOf(path)); if (mustToggle) makeWritable(dirOf(path));
/* When we're done, make the directory read-only again and /* When we're done, make the directory read-only again and reset
reset its timestamp back to 0. */ its timestamp back to 0. */
MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : ""); MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
/* If prevPath is immutable, we can't create hard links /* If linkPath is immutable, we can't create hard links to it,
to it, so make it mutable first (and make it immutable so make it mutable first (and make it immutable again when
again when we're done). We also have to make path we're done). We also have to make path mutable, otherwise
mutable, otherwise rename() will fail to delete it. */ rename() will fail to delete it. */
makeMutable(prevPath.first); makeMutable(linkPath);
MakeImmutable mk1(prevPath.first); MakeImmutable mk1(linkPath);
makeMutable(path); makeMutable(path);
MakeImmutable mk2(path); MakeImmutable mk2(path);
if (link(prevPath.first.c_str(), tempLink.c_str()) == -1) { if (link(linkPath.c_str(), tempLink.c_str()) == -1) {
if (errno == EMLINK) { if (errno == EMLINK) {
/* Too many links to the same file (>= 32000 on /* Too many links to the same file (>= 32000 on most file
most file systems). This is likely to happen systems). This is likely to happen with empty files.
with empty files. Just start over, creating Just shrug and ignore. */
links to the current file. */ printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first);
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
return; return;
} }
throw SysError(format("cannot link `%1%' to `%2%'") throw SysError(format("cannot link `%1%' to `%2%'") % tempLink % linkPath);
% tempLink % prevPath.first);
} }
/* Atomically replace the old file with the new hard link. */ /* Atomically replace the old file with the new hard link. */
if (rename(tempLink.c_str(), path.c_str()) == -1) { if (rename(tempLink.c_str(), path.c_str()) == -1) {
if (errno == EMLINK) { if (errno == EMLINK) {
/* Some filesystems generate too many links on the /* Some filesystems generate too many links on the rename,
rename, rather than on the original link. rather than on the original link. (Probably it
(Probably it temporarily increases the st_nlink temporarily increases the st_nlink field before
field before decreasing it again.) */ decreasing it again.) */
printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first); printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
/* Unlink the temp link. */ /* Unlink the temp link. */
if (unlink(tempLink.c_str()) == -1) if (unlink(linkPath.c_str()) == -1)
printMsg(lvlError, format("unable to unlink `%1%'") % tempLink); printMsg(lvlError, format("unable to unlink `%1%'") % linkPath);
return; return;
} }
throw SysError(format("cannot rename `%1%' to `%2%'") throw SysError(format("cannot rename `%1%' to `%2%'") % tempLink % path);
% tempLink % path);
} }
} else
printMsg(lvlTalkative, format("would link `%1%' to `%2%'") % path % prevPath.first);
stats.filesLinked++; stats.filesLinked++;
stats.bytesFreed += st.st_size; stats.bytesFreed += st.st_size;
stats.blocksFreed += st.st_blocks; stats.blocksFreed += st.st_blocks;
}
if (S_ISDIR(st.st_mode)) {
Strings names = readDirectory(path);
foreach (Strings::iterator, i, names)
hashAndLink(dryRun, hashToPath, stats, path + "/" + *i);
}
} }
void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats) void LocalStore::optimiseStore(OptimiseStats & stats)
{ {
HashToPath hashToPath; createDirs(nixStore + "/" + linksDir);
PathSet paths = queryValidPaths(); PathSet paths = queryValidPaths();
@ -184,7 +185,7 @@ void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
addTempRoot(*i); addTempRoot(*i);
if (!isValidPath(*i)) continue; /* path was GC'ed, probably */ if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i); startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
hashAndLink(dryRun, hashToPath, stats, *i); hashAndLink(stats, *i);
} }
} }

View file

@ -746,18 +746,12 @@ static void showOptimiseStats(OptimiseStats & stats)
files with the same contents. */ files with the same contents. */
static void opOptimise(Strings opFlags, Strings opArgs) static void opOptimise(Strings opFlags, Strings opArgs)
{ {
if (!opArgs.empty()) if (!opArgs.empty() || !opFlags.empty())
throw UsageError("no arguments expected"); throw UsageError("no arguments expected");
bool dryRun = false;
foreach (Strings::iterator, i, opFlags)
if (*i == "--dry-run") dryRun = true;
else throw UsageError(format("unknown flag `%1%'") % *i);
OptimiseStats stats; OptimiseStats stats;
try { try {
ensureLocalStore().optimiseStore(dryRun, stats); ensureLocalStore().optimiseStore(stats);
} catch (...) { } catch (...) {
showOptimiseStats(stats); showOptimiseStats(stats);
throw; throw;