From 2f04e7102eaad3159073019af96e6e5c4f2c9bcf Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 16 Jun 2003 15:59:23 +0000 Subject: [PATCH] * Path hashing. --- src/eval.cc | 4 +- src/hash.cc | 141 +++++++++++++++++++++++++++++++++++++++++++++++++- src/hash.hh | 19 +++++-- src/test.cc | 16 ++++++ src/values.cc | 6 +-- 5 files changed, 178 insertions(+), 8 deletions(-) diff --git a/src/eval.cc b/src/eval.cc index 14577c873..c96cf6467 100644 --- a/src/eval.cc +++ b/src/eval.cc @@ -148,10 +148,12 @@ static Hash computeDerived(Hash sourceHash, string targetName, throw Error("program " + progPath + " failed to create a result in " + targetPath); +#if 0 /* Remove write permission from the value. */ int res = system(("chmod -R -w " + targetPath).c_str()); // !!! escaping if (WEXITSTATUS(res) != 0) throw Error("cannot remove write permission from " + targetPath); +#endif } catch (exception &) { // system(("rm -rf " + targetPath).c_str()); @@ -159,7 +161,7 @@ static Hash computeDerived(Hash sourceHash, string targetName, } /* Hash the result. */ - Hash targetHash = hashFile(targetPath); + Hash targetHash = hashPath(targetPath); /* Register targetHash -> targetPath. !!! this should be in values.cc. */ diff --git a/src/hash.cc b/src/hash.cc index 9451ac8d8..9558d3670 100644 --- a/src/hash.cc +++ b/src/hash.cc @@ -1,9 +1,16 @@ +#include + +#include +#include +#include +#include +#include + extern "C" { #include "md5.h" } #include "hash.hh" -#include Hash::Hash() @@ -88,3 +95,135 @@ Hash hashFile(const string & fileName) if (err) throw SysError("cannot hash file " + fileName); return hash; } + + +struct HashSink : DumpSink +{ + struct md5_ctx ctx; + virtual void operator () + (const unsigned char * data, unsigned int len) + { + md5_process_bytes(data, len, &ctx); + } +}; + + +Hash hashPath(const string & path) +{ + Hash hash; + HashSink sink; + md5_init_ctx(&sink.ctx); + dumpPath(path, sink); + md5_finish_ctx(&sink.ctx, hash.hash); + return hash; +} + + +static void pad(unsigned int len, DumpSink & sink) +{ + if (len % 8) { + unsigned char zero[8]; + memset(zero, 0, sizeof(zero)); + sink(zero, 8 - (len % 8)); + } +} + + +static void writeInt(unsigned int n, DumpSink & sink) +{ + unsigned char buf[8]; + memset(buf, 0, sizeof(buf)); + buf[0] = n & 0xff; + buf[1] = (n >> 8) & 0xff; + buf[2] = (n >> 16) & 0xff; + buf[3] = (n >> 24) & 0xff; + sink(buf, sizeof(buf)); +} + + +static void writeString(const string & s, DumpSink & sink) +{ + unsigned int len = s.length(); + writeInt(len, sink); + sink((const unsigned char *) s.c_str(), len); + pad(len, sink); +} + + +static void dumpEntries(const string & path, DumpSink & sink) +{ + DIR * dir = opendir(path.c_str()); + if (!dir) throw SysError("opening directory " + path); + + struct dirent * dirent; + + /* !!! sort entries */ + + while (errno = 0, dirent = readdir(dir)) { + string name = dirent->d_name; + if (name == "." || name == "..") continue; + writeString("entry", sink); + writeString("(", sink); + writeString("name", sink); + writeString(name, sink); + writeString("file", sink); + dumpPath(path + "/" + name, sink); + writeString(")", sink); + } + + if (errno) throw SysError("reading directory " + path); + + closedir(dir); /* !!! close on exception */ +} + + +static void dumpContents(const string & path, unsigned int size, + DumpSink & sink) +{ + writeString("contents", sink); + writeInt(size, sink); + + int fd = open(path.c_str(), O_RDONLY); + if (!fd) throw SysError("opening file " + path); + + unsigned char buf[16384]; + + unsigned int total = 0; + ssize_t n; + while ((n = read(fd, buf, sizeof(buf)))) { + if (n == -1) throw SysError("reading file " + path); + total += n; + sink(buf, n); + } + + if (total != size) + throw SysError("file changed while reading it: " + path); + + pad(size, sink); + + close(fd); /* !!! close on exception */ +} + + +void dumpPath(const string & path, DumpSink & sink) +{ + cerr << path << endl; + + struct stat st; + if (lstat(path.c_str(), &st)) + throw SysError("getting attributes of path " + path); + + writeString("(", sink); + + if (S_ISREG(st.st_mode)) { + writeString("type", sink); + writeString("regular", sink); + dumpContents(path, st.st_size, sink); + } else if (S_ISDIR(st.st_mode)) { + writeString("type", sink); + writeString("directory", sink); + dumpEntries(path, sink); + } else throw Error("unknown file type: " + path); + + writeString(")", sink); +} diff --git a/src/hash.hh b/src/hash.hh index 9d72e66db..13c5275b4 100644 --- a/src/hash.hh +++ b/src/hash.hh @@ -50,6 +50,11 @@ Hash hashFile(const string & fileName); follows: hash(path) = md5(dump(path)) +*/ +Hash hashPath(const string & path); + + +/* Dump a path as follows: IF path points to a REGULAR FILE: dump(path) = attrs( @@ -60,7 +65,7 @@ Hash hashFile(const string & fileName); IF path points to a DIRECTORY: dump(path) = attrs( [ ("type", "directory") - , ("entries", concat(map(f, entries(path)))) + , ("entries", concat(map(f, sort(entries(path))))) ]) where f(fn) = attrs( [ ("name", fn) @@ -72,17 +77,25 @@ Hash hashFile(const string & fileName); attrs(as) = concat(map(attr, as)) + encN(0) attrs((a, b)) = encS(a) + encS(b) - encS(s) = encN(len(s)) + s + encS(s) = encN(len(s)) + s + (padding until next 64-bit boundary) encN(n) = 64-bit little-endian encoding of n. contents(path) = the contents of a regular file. + sort(strings) = lexicographic sort by 8-bit value (strcmp). + entries(path) = the entries of a directory, without `.' and `..'. `+' denotes string concatenation. */ -Hash hashPath(const string & path); + +struct DumpSink +{ + virtual void operator () (const unsigned char * data, unsigned int len) = 0; +}; + +void dumpPath(const string & path, DumpSink & sink); #endif /* !__HASH_H */ diff --git a/src/test.cc b/src/test.cc index 79468182e..b37a16a1f 100644 --- a/src/test.cc +++ b/src/test.cc @@ -19,6 +19,15 @@ void evalTest(Expr e) } +struct MySink : DumpSink +{ + virtual void operator () (const unsigned char * data, unsigned int len) + { + cout.write((char *) data, len); + } +}; + + void runTests() { /* Hashing. */ @@ -36,6 +45,13 @@ void runTests() abort(); } catch (BadRefError err) { }; + /* Dumping. */ + +#if 0 + MySink sink; + dumpPath("scratch", sink); + cout << (string) hashPath("scratch") << endl; +#endif /* Set up the test environment. */ diff --git a/src/values.cc b/src/values.cc index 064203ae2..77a6f928e 100644 --- a/src/values.cc +++ b/src/values.cc @@ -19,7 +19,7 @@ static string absValuePath(string s) Hash addValue(string path) { - Hash hash = hashFile(path); + Hash hash = hashPath(path); string name; if (queryDB(nixDB, dbRefs, hash, name)) { @@ -72,8 +72,8 @@ string queryValuePath(Hash hash) if (queryDB(nixDB, dbRefs, hash, name)) { string fn = absValuePath(name); - /* Verify that the file hasn't changed. !!! race */ - if (hashFile(fn) != hash) + /* Verify that the file hasn't changed. !!! race !!! slow */ + if (hashPath(fn) != hash) throw Error("file " + fn + " is stale"); return fn;