forked from lix-project/lix
optimiseStore(): Use a content-addressed file store in /nix/store/.links
optimiseStore() now creates persistent, content-addressed hard links in /nix/store/.links. For instance, if it encounters a file P with hash H, it will create a hard link P' = /nix/store/.link/<H> to P if P' doesn't already exist; if P' exist, then P is replaced by a hard link to P'. This is better than the previous in-memory map, because it had the tendency to unnecessarily replace hard links with a hard link to whatever happened to be the first file with a given hash it encountered. It also allows on-the-fly, incremental optimisation.
This commit is contained in:
parent
ed59bf7a18
commit
564fb7d9fa
|
@ -167,7 +167,7 @@ public:
|
||||||
|
|
||||||
/* Optimise the disk space usage of the Nix store by hard-linking
|
/* Optimise the disk space usage of the Nix store by hard-linking
|
||||||
files with the same contents. */
|
files with the same contents. */
|
||||||
void optimiseStore(bool dryRun, OptimiseStats & stats);
|
void optimiseStore(OptimiseStats & stats);
|
||||||
|
|
||||||
/* Check the integrity of the Nix store. */
|
/* Check the integrity of the Nix store. */
|
||||||
void verifyStore(bool checkContents);
|
void verifyStore(bool checkContents);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#include "util.hh"
|
#include "util.hh"
|
||||||
#include "local-store.hh"
|
#include "local-store.hh"
|
||||||
#include "immutable.hh"
|
#include "immutable.hh"
|
||||||
|
#include "globals.hh"
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
@ -12,9 +13,6 @@
|
||||||
namespace nix {
|
namespace nix {
|
||||||
|
|
||||||
|
|
||||||
typedef std::map<Hash, std::pair<Path, ino_t> > HashToPath;
|
|
||||||
|
|
||||||
|
|
||||||
static void makeWritable(const Path & path)
|
static void makeWritable(const Path & path)
|
||||||
{
|
{
|
||||||
struct stat st;
|
struct stat st;
|
||||||
|
@ -51,132 +49,135 @@ struct MakeImmutable
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static void hashAndLink(bool dryRun, HashToPath & hashToPath,
|
const string linksDir = ".links";
|
||||||
OptimiseStats & stats, const Path & path)
|
|
||||||
|
|
||||||
|
static void hashAndLink(OptimiseStats & stats, const Path & path)
|
||||||
{
|
{
|
||||||
struct stat st;
|
struct stat st;
|
||||||
if (lstat(path.c_str(), &st))
|
if (lstat(path.c_str(), &st))
|
||||||
throw SysError(format("getting attributes of path `%1%'") % path);
|
throw SysError(format("getting attributes of path `%1%'") % path);
|
||||||
|
|
||||||
|
if (S_ISDIR(st.st_mode)) {
|
||||||
|
Strings names = readDirectory(path);
|
||||||
|
foreach (Strings::iterator, i, names)
|
||||||
|
hashAndLink(stats, path + "/" + *i);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We can hard link regular files and symlinks. */
|
||||||
|
if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) return;
|
||||||
|
|
||||||
/* Sometimes SNAFUs can cause files in the Nix store to be
|
/* Sometimes SNAFUs can cause files in the Nix store to be
|
||||||
modified, in particular when running programs as root under
|
modified, in particular when running programs as root under
|
||||||
NixOS (example: $fontconfig/var/cache being modified). Skip
|
NixOS (example: $fontconfig/var/cache being modified). Skip
|
||||||
those files. */
|
those files. FIXME: check the modification time. */
|
||||||
if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
|
if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
|
||||||
printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path);
|
printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We can hard link regular files and symlinks. */
|
/* Hash the file. Note that hashPath() returns the hash over the
|
||||||
if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
|
NAR serialisation, which includes the execute bit on the file.
|
||||||
|
Thus, executable and non-executable files with the same
|
||||||
|
contents *won't* be linked (which is good because otherwise the
|
||||||
|
permissions would be screwed up).
|
||||||
|
|
||||||
/* Hash the file. Note that hashPath() returns the hash over
|
Also note that if `path' is a symlink, then we're hashing the
|
||||||
the NAR serialisation, which includes the execute bit on
|
contents of the symlink (i.e. the result of readlink()), not
|
||||||
the file. Thus, executable and non-executable files with
|
the contents of the target (which may not even exist). */
|
||||||
the same contents *won't* be linked (which is good because
|
Hash hash = hashPath(htSHA256, path).first;
|
||||||
otherwise the permissions would be screwed up).
|
stats.totalFiles++;
|
||||||
|
printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
|
||||||
|
|
||||||
Also note that if `path' is a symlink, then we're hashing
|
/* Check if this is a known hash. */
|
||||||
the contents of the symlink (i.e. the result of
|
Path linkPath = nixStore + "/" + linksDir + "/" + printHash32(hash);
|
||||||
readlink()), not the contents of the target (which may not
|
|
||||||
even exist). */
|
|
||||||
Hash hash = hashPath(htSHA256, path).first;
|
|
||||||
stats.totalFiles++;
|
|
||||||
printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
|
|
||||||
|
|
||||||
std::pair<Path, ino_t> prevPath = hashToPath[hash];
|
if (!pathExists(linkPath)) {
|
||||||
|
/* Nope, create a hard link in the links directory. */
|
||||||
|
makeMutable(path);
|
||||||
|
MakeImmutable mk1(path);
|
||||||
|
|
||||||
if (prevPath.first == "") {
|
if (link(path.c_str(), linkPath.c_str()) == -1)
|
||||||
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
|
throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path);
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Yes! We've seen a file with the same contents. Replace
|
return;
|
||||||
the current file with a hard link to that file. */
|
|
||||||
stats.sameContents++;
|
|
||||||
if (prevPath.second == st.st_ino) {
|
|
||||||
printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % prevPath.first);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!dryRun) {
|
|
||||||
|
|
||||||
printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % prevPath.first);
|
|
||||||
|
|
||||||
Path tempLink = (format("%1%.tmp-%2%-%3%")
|
|
||||||
% path % getpid() % rand()).str();
|
|
||||||
|
|
||||||
/* Make the containing directory writable, but only if
|
|
||||||
it's not the store itself (we don't want or need to
|
|
||||||
mess with its permissions). */
|
|
||||||
bool mustToggle = !isStorePath(path);
|
|
||||||
if (mustToggle) makeWritable(dirOf(path));
|
|
||||||
|
|
||||||
/* When we're done, make the directory read-only again and
|
|
||||||
reset its timestamp back to 0. */
|
|
||||||
MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
|
|
||||||
|
|
||||||
/* If ‘prevPath’ is immutable, we can't create hard links
|
|
||||||
to it, so make it mutable first (and make it immutable
|
|
||||||
again when we're done). We also have to make ‘path’
|
|
||||||
mutable, otherwise rename() will fail to delete it. */
|
|
||||||
makeMutable(prevPath.first);
|
|
||||||
MakeImmutable mk1(prevPath.first);
|
|
||||||
|
|
||||||
makeMutable(path);
|
|
||||||
MakeImmutable mk2(path);
|
|
||||||
|
|
||||||
if (link(prevPath.first.c_str(), tempLink.c_str()) == -1) {
|
|
||||||
if (errno == EMLINK) {
|
|
||||||
/* Too many links to the same file (>= 32000 on
|
|
||||||
most file systems). This is likely to happen
|
|
||||||
with empty files. Just start over, creating
|
|
||||||
links to the current file. */
|
|
||||||
printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first);
|
|
||||||
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
throw SysError(format("cannot link `%1%' to `%2%'")
|
|
||||||
% tempLink % prevPath.first);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Atomically replace the old file with the new hard link. */
|
|
||||||
if (rename(tempLink.c_str(), path.c_str()) == -1) {
|
|
||||||
if (errno == EMLINK) {
|
|
||||||
/* Some filesystems generate too many links on the
|
|
||||||
rename, rather than on the original link.
|
|
||||||
(Probably it temporarily increases the st_nlink
|
|
||||||
field before decreasing it again.) */
|
|
||||||
printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first);
|
|
||||||
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
|
|
||||||
|
|
||||||
/* Unlink the temp link. */
|
|
||||||
if (unlink(tempLink.c_str()) == -1)
|
|
||||||
printMsg(lvlError, format("unable to unlink `%1%'") % tempLink);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
throw SysError(format("cannot rename `%1%' to `%2%'")
|
|
||||||
% tempLink % path);
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
printMsg(lvlTalkative, format("would link `%1%' to `%2%'") % path % prevPath.first);
|
|
||||||
|
|
||||||
stats.filesLinked++;
|
|
||||||
stats.bytesFreed += st.st_size;
|
|
||||||
stats.blocksFreed += st.st_blocks;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (S_ISDIR(st.st_mode)) {
|
/* Yes! We've seen a file with the same contents. Replace the
|
||||||
Strings names = readDirectory(path);
|
current file with a hard link to that file. */
|
||||||
foreach (Strings::iterator, i, names)
|
struct stat stLink;
|
||||||
hashAndLink(dryRun, hashToPath, stats, path + "/" + *i);
|
if (lstat(linkPath.c_str(), &stLink))
|
||||||
|
throw SysError(format("getting attributes of path `%1%'") % linkPath);
|
||||||
|
|
||||||
|
stats.sameContents++;
|
||||||
|
if (st.st_ino == stLink.st_ino) {
|
||||||
|
printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % linkPath);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % linkPath);
|
||||||
|
|
||||||
|
Path tempLink = (format("%1%/.tmp-link-%2%-%3%")
|
||||||
|
% nixStore % getpid() % rand()).str();
|
||||||
|
|
||||||
|
/* Make the containing directory writable, but only if it's not
|
||||||
|
the store itself (we don't want or need to mess with its
|
||||||
|
permissions). */
|
||||||
|
bool mustToggle = !isStorePath(path);
|
||||||
|
if (mustToggle) makeWritable(dirOf(path));
|
||||||
|
|
||||||
|
/* When we're done, make the directory read-only again and reset
|
||||||
|
its timestamp back to 0. */
|
||||||
|
MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
|
||||||
|
|
||||||
|
/* If ‘linkPath’ is immutable, we can't create hard links to it,
|
||||||
|
so make it mutable first (and make it immutable again when
|
||||||
|
we're done). We also have to make ‘path’ mutable, otherwise
|
||||||
|
rename() will fail to delete it. */
|
||||||
|
makeMutable(linkPath);
|
||||||
|
MakeImmutable mk1(linkPath);
|
||||||
|
|
||||||
|
makeMutable(path);
|
||||||
|
MakeImmutable mk2(path);
|
||||||
|
|
||||||
|
if (link(linkPath.c_str(), tempLink.c_str()) == -1) {
|
||||||
|
if (errno == EMLINK) {
|
||||||
|
/* Too many links to the same file (>= 32000 on most file
|
||||||
|
systems). This is likely to happen with empty files.
|
||||||
|
Just shrug and ignore. */
|
||||||
|
printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw SysError(format("cannot link `%1%' to `%2%'") % tempLink % linkPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Atomically replace the old file with the new hard link. */
|
||||||
|
if (rename(tempLink.c_str(), path.c_str()) == -1) {
|
||||||
|
if (errno == EMLINK) {
|
||||||
|
/* Some filesystems generate too many links on the rename,
|
||||||
|
rather than on the original link. (Probably it
|
||||||
|
temporarily increases the st_nlink field before
|
||||||
|
decreasing it again.) */
|
||||||
|
printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
|
||||||
|
|
||||||
|
/* Unlink the temp link. */
|
||||||
|
if (unlink(linkPath.c_str()) == -1)
|
||||||
|
printMsg(lvlError, format("unable to unlink `%1%'") % linkPath);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw SysError(format("cannot rename `%1%' to `%2%'") % tempLink % path);
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.filesLinked++;
|
||||||
|
stats.bytesFreed += st.st_size;
|
||||||
|
stats.blocksFreed += st.st_blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
|
void LocalStore::optimiseStore(OptimiseStats & stats)
|
||||||
{
|
{
|
||||||
HashToPath hashToPath;
|
createDirs(nixStore + "/" + linksDir);
|
||||||
|
|
||||||
PathSet paths = queryValidPaths();
|
PathSet paths = queryValidPaths();
|
||||||
|
|
||||||
|
@ -184,7 +185,7 @@ void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
|
||||||
addTempRoot(*i);
|
addTempRoot(*i);
|
||||||
if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
|
if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
|
||||||
startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
|
startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
|
||||||
hashAndLink(dryRun, hashToPath, stats, *i);
|
hashAndLink(stats, *i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -746,18 +746,12 @@ static void showOptimiseStats(OptimiseStats & stats)
|
||||||
files with the same contents. */
|
files with the same contents. */
|
||||||
static void opOptimise(Strings opFlags, Strings opArgs)
|
static void opOptimise(Strings opFlags, Strings opArgs)
|
||||||
{
|
{
|
||||||
if (!opArgs.empty())
|
if (!opArgs.empty() || !opFlags.empty())
|
||||||
throw UsageError("no arguments expected");
|
throw UsageError("no arguments expected");
|
||||||
|
|
||||||
bool dryRun = false;
|
|
||||||
|
|
||||||
foreach (Strings::iterator, i, opFlags)
|
|
||||||
if (*i == "--dry-run") dryRun = true;
|
|
||||||
else throw UsageError(format("unknown flag `%1%'") % *i);
|
|
||||||
|
|
||||||
OptimiseStats stats;
|
OptimiseStats stats;
|
||||||
try {
|
try {
|
||||||
ensureLocalStore().optimiseStore(dryRun, stats);
|
ensureLocalStore().optimiseStore(stats);
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
showOptimiseStats(stats);
|
showOptimiseStats(stats);
|
||||||
throw;
|
throw;
|
||||||
|
|
Loading…
Reference in a new issue