optimiseStore(): Use a content-addressed file store in /nix/store/.links

optimiseStore() now creates persistent, content-addressed hard links
in /nix/store/.links.  For instance, if it encounters a file P with
hash H, it will create a hard link

  P' = /nix/store/.link/<H>

to P if P' doesn't already exist; if P' exist, then P is replaced by a
hard link to P'.  This is better than the previous in-memory map,
because it had the tendency to unnecessarily replace hard links with a
hard link to whatever happened to be the first file with a given hash
it encountered.  It also allows on-the-fly, incremental optimisation.
This commit is contained in:
Eelco Dolstra 2012-07-23 12:08:34 -04:00
parent ed59bf7a18
commit 564fb7d9fa
3 changed files with 109 additions and 114 deletions

View file

@ -167,7 +167,7 @@ public:
/* Optimise the disk space usage of the Nix store by hard-linking /* Optimise the disk space usage of the Nix store by hard-linking
files with the same contents. */ files with the same contents. */
void optimiseStore(bool dryRun, OptimiseStats & stats); void optimiseStore(OptimiseStats & stats);
/* Check the integrity of the Nix store. */ /* Check the integrity of the Nix store. */
void verifyStore(bool checkContents); void verifyStore(bool checkContents);

View file

@ -1,6 +1,7 @@
#include "util.hh" #include "util.hh"
#include "local-store.hh" #include "local-store.hh"
#include "immutable.hh" #include "immutable.hh"
#include "globals.hh"
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
@ -12,9 +13,6 @@
namespace nix { namespace nix {
typedef std::map<Hash, std::pair<Path, ino_t> > HashToPath;
static void makeWritable(const Path & path) static void makeWritable(const Path & path)
{ {
struct stat st; struct stat st;
@ -51,132 +49,135 @@ struct MakeImmutable
}; };
static void hashAndLink(bool dryRun, HashToPath & hashToPath, const string linksDir = ".links";
OptimiseStats & stats, const Path & path)
static void hashAndLink(OptimiseStats & stats, const Path & path)
{ {
struct stat st; struct stat st;
if (lstat(path.c_str(), &st)) if (lstat(path.c_str(), &st))
throw SysError(format("getting attributes of path `%1%'") % path); throw SysError(format("getting attributes of path `%1%'") % path);
if (S_ISDIR(st.st_mode)) {
Strings names = readDirectory(path);
foreach (Strings::iterator, i, names)
hashAndLink(stats, path + "/" + *i);
return;
}
/* We can hard link regular files and symlinks. */
if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) return;
/* Sometimes SNAFUs can cause files in the Nix store to be /* Sometimes SNAFUs can cause files in the Nix store to be
modified, in particular when running programs as root under modified, in particular when running programs as root under
NixOS (example: $fontconfig/var/cache being modified). Skip NixOS (example: $fontconfig/var/cache being modified). Skip
those files. */ those files. FIXME: check the modification time. */
if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) { if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path); printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path);
return; return;
} }
/* We can hard link regular files and symlinks. */ /* Hash the file. Note that hashPath() returns the hash over the
if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { NAR serialisation, which includes the execute bit on the file.
Thus, executable and non-executable files with the same
contents *won't* be linked (which is good because otherwise the
permissions would be screwed up).
/* Hash the file. Note that hashPath() returns the hash over Also note that if `path' is a symlink, then we're hashing the
the NAR serialisation, which includes the execute bit on contents of the symlink (i.e. the result of readlink()), not
the file. Thus, executable and non-executable files with the contents of the target (which may not even exist). */
the same contents *won't* be linked (which is good because Hash hash = hashPath(htSHA256, path).first;
otherwise the permissions would be screwed up). stats.totalFiles++;
printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
Also note that if `path' is a symlink, then we're hashing /* Check if this is a known hash. */
the contents of the symlink (i.e. the result of Path linkPath = nixStore + "/" + linksDir + "/" + printHash32(hash);
readlink()), not the contents of the target (which may not
even exist). */
Hash hash = hashPath(htSHA256, path).first;
stats.totalFiles++;
printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
std::pair<Path, ino_t> prevPath = hashToPath[hash]; if (!pathExists(linkPath)) {
/* Nope, create a hard link in the links directory. */
if (prevPath.first == "") { makeMutable(path);
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino); MakeImmutable mk1(path);
return;
}
/* Yes! We've seen a file with the same contents. Replace
the current file with a hard link to that file. */
stats.sameContents++;
if (prevPath.second == st.st_ino) {
printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % prevPath.first);
return;
}
if (!dryRun) {
printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % prevPath.first);
Path tempLink = (format("%1%.tmp-%2%-%3%") if (link(path.c_str(), linkPath.c_str()) == -1)
% path % getpid() % rand()).str(); throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path);
/* Make the containing directory writable, but only if return;
it's not the store itself (we don't want or need to
mess with its permissions). */
bool mustToggle = !isStorePath(path);
if (mustToggle) makeWritable(dirOf(path));
/* When we're done, make the directory read-only again and
reset its timestamp back to 0. */
MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
/* If prevPath is immutable, we can't create hard links
to it, so make it mutable first (and make it immutable
again when we're done). We also have to make path
mutable, otherwise rename() will fail to delete it. */
makeMutable(prevPath.first);
MakeImmutable mk1(prevPath.first);
makeMutable(path);
MakeImmutable mk2(path);
if (link(prevPath.first.c_str(), tempLink.c_str()) == -1) {
if (errno == EMLINK) {
/* Too many links to the same file (>= 32000 on
most file systems). This is likely to happen
with empty files. Just start over, creating
links to the current file. */
printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first);
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
return;
}
throw SysError(format("cannot link `%1%' to `%2%'")
% tempLink % prevPath.first);
}
/* Atomically replace the old file with the new hard link. */
if (rename(tempLink.c_str(), path.c_str()) == -1) {
if (errno == EMLINK) {
/* Some filesystems generate too many links on the
rename, rather than on the original link.
(Probably it temporarily increases the st_nlink
field before decreasing it again.) */
printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first);
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
/* Unlink the temp link. */
if (unlink(tempLink.c_str()) == -1)
printMsg(lvlError, format("unable to unlink `%1%'") % tempLink);
return;
}
throw SysError(format("cannot rename `%1%' to `%2%'")
% tempLink % path);
}
} else
printMsg(lvlTalkative, format("would link `%1%' to `%2%'") % path % prevPath.first);
stats.filesLinked++;
stats.bytesFreed += st.st_size;
stats.blocksFreed += st.st_blocks;
} }
if (S_ISDIR(st.st_mode)) { /* Yes! We've seen a file with the same contents. Replace the
Strings names = readDirectory(path); current file with a hard link to that file. */
foreach (Strings::iterator, i, names) struct stat stLink;
hashAndLink(dryRun, hashToPath, stats, path + "/" + *i); if (lstat(linkPath.c_str(), &stLink))
throw SysError(format("getting attributes of path `%1%'") % linkPath);
stats.sameContents++;
if (st.st_ino == stLink.st_ino) {
printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % linkPath);
return;
} }
printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % linkPath);
Path tempLink = (format("%1%/.tmp-link-%2%-%3%")
% nixStore % getpid() % rand()).str();
/* Make the containing directory writable, but only if it's not
the store itself (we don't want or need to mess with its
permissions). */
bool mustToggle = !isStorePath(path);
if (mustToggle) makeWritable(dirOf(path));
/* When we're done, make the directory read-only again and reset
its timestamp back to 0. */
MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
/* If linkPath is immutable, we can't create hard links to it,
so make it mutable first (and make it immutable again when
we're done). We also have to make path mutable, otherwise
rename() will fail to delete it. */
makeMutable(linkPath);
MakeImmutable mk1(linkPath);
makeMutable(path);
MakeImmutable mk2(path);
if (link(linkPath.c_str(), tempLink.c_str()) == -1) {
if (errno == EMLINK) {
/* Too many links to the same file (>= 32000 on most file
systems). This is likely to happen with empty files.
Just shrug and ignore. */
printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
return;
}
throw SysError(format("cannot link `%1%' to `%2%'") % tempLink % linkPath);
}
/* Atomically replace the old file with the new hard link. */
if (rename(tempLink.c_str(), path.c_str()) == -1) {
if (errno == EMLINK) {
/* Some filesystems generate too many links on the rename,
rather than on the original link. (Probably it
temporarily increases the st_nlink field before
decreasing it again.) */
printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
/* Unlink the temp link. */
if (unlink(linkPath.c_str()) == -1)
printMsg(lvlError, format("unable to unlink `%1%'") % linkPath);
return;
}
throw SysError(format("cannot rename `%1%' to `%2%'") % tempLink % path);
}
stats.filesLinked++;
stats.bytesFreed += st.st_size;
stats.blocksFreed += st.st_blocks;
} }
void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats) void LocalStore::optimiseStore(OptimiseStats & stats)
{ {
HashToPath hashToPath; createDirs(nixStore + "/" + linksDir);
PathSet paths = queryValidPaths(); PathSet paths = queryValidPaths();
@ -184,7 +185,7 @@ void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
addTempRoot(*i); addTempRoot(*i);
if (!isValidPath(*i)) continue; /* path was GC'ed, probably */ if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i); startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
hashAndLink(dryRun, hashToPath, stats, *i); hashAndLink(stats, *i);
} }
} }

View file

@ -746,18 +746,12 @@ static void showOptimiseStats(OptimiseStats & stats)
files with the same contents. */ files with the same contents. */
static void opOptimise(Strings opFlags, Strings opArgs) static void opOptimise(Strings opFlags, Strings opArgs)
{ {
if (!opArgs.empty()) if (!opArgs.empty() || !opFlags.empty())
throw UsageError("no arguments expected"); throw UsageError("no arguments expected");
bool dryRun = false;
foreach (Strings::iterator, i, opFlags)
if (*i == "--dry-run") dryRun = true;
else throw UsageError(format("unknown flag `%1%'") % *i);
OptimiseStats stats; OptimiseStats stats;
try { try {
ensureLocalStore().optimiseStore(dryRun, stats); ensureLocalStore().optimiseStore(stats);
} catch (...) { } catch (...) {
showOptimiseStats(stats); showOptimiseStats(stats);
throw; throw;