* Did something useful while waiting at IAD: reference scanning is now

much faster.
This commit is contained in:
Eelco Dolstra 2005-11-16 08:27:06 +00:00
parent 9311ab76a5
commit b7f008fc35
5 changed files with 52 additions and 31 deletions

View file

@ -1390,10 +1390,7 @@ void DerivationGoal::computeClosure()
in it. */ in it. */
PathSet references; PathSet references;
if (!pathExists(path + "/nix-support/no-scan")) { if (!pathExists(path + "/nix-support/no-scan")) {
Paths references2; references = scanForReferences(path, allPaths);
references2 = filterReferences(path,
Paths(allPaths.begin(), allPaths.end()));
references = PathSet(references2.begin(), references2.end());
/* For debugging, print out the referenced and /* For debugging, print out the referenced and
unreferenced paths. */ unreferenced paths. */

View file

@ -11,26 +11,45 @@
#include "hash.hh" #include "hash.hh"
static unsigned int refLength = 32; /* characters */
static void search(const string & s, static void search(const string & s,
Strings & ids, Strings & seen) StringSet & ids, StringSet & seen)
{ {
for (Strings::iterator i = ids.begin(); static bool initialised = false;
i != ids.end(); ) static bool isBase32[256];
{ if (!initialised) {
checkInterrupt(); for (unsigned int i = 0; i < 256; ++i) isBase32[i] = false;
if (s.find(*i) == string::npos) for (unsigned int i = 0; i < base32Chars.size(); ++i)
i++; isBase32[(unsigned char) base32Chars[i]] = true;
else { initialised = true;
debug(format("found reference to `%1%'") % *i); }
seen.push_back(*i);
i = ids.erase(i); for (unsigned int i = 0; i + refLength <= s.size(); ) {
int j;
bool match = true;
for (j = refLength - 1; j >= 0; --j)
if (!isBase32[(unsigned char) s[i + j]]) {
i += j + 1;
match = false;
break;
}
if (!match) continue;
string ref(s, i, refLength);
if (ids.find(ref) != ids.end()) {
debug(format("found reference to `%1%' at offset `%2%'")
% ref % i);
seen.insert(ref);
ids.erase(ref);
} }
++i;
} }
} }
void checkPath(const string & path, void checkPath(const string & path,
Strings & ids, Strings & seen) StringSet & ids, StringSet & seen)
{ {
checkInterrupt(); checkInterrupt();
@ -69,36 +88,35 @@ void checkPath(const string & path,
} }
Strings filterReferences(const string & path, const Strings & paths) PathSet scanForReferences(const string & path, const PathSet & paths)
{ {
map<string, string> backMap; map<string, Path> backMap;
Strings ids; StringSet ids;
Strings seen; StringSet seen;
/* For efficiency (and a higher hit rate), just search for the /* For efficiency (and a higher hit rate), just search for the
hash part of the file name. (This assumes that all references hash part of the file name. (This assumes that all references
have the form `HASH-bla'). */ have the form `HASH-bla'). */
for (Strings::const_iterator i = paths.begin(); for (PathSet::const_iterator i = paths.begin(); i != paths.end(); i++) {
i != paths.end(); i++)
{
string baseName = baseNameOf(*i); string baseName = baseNameOf(*i);
unsigned int pos = baseName.find('-'); unsigned int pos = baseName.find('-');
if (pos == string::npos) if (pos == string::npos)
throw Error(format("bad reference `%1%'") % *i); throw Error(format("bad reference `%1%'") % *i);
string s = string(baseName, 0, pos); string s = string(baseName, 0, pos);
assert(s.size() == refLength);
assert(backMap.find(s) == backMap.end());
// parseHash(htSHA256, s); // parseHash(htSHA256, s);
ids.push_back(s); ids.insert(s);
backMap[s] = *i; backMap[s] = *i;
} }
checkPath(path, ids, seen); checkPath(path, ids, seen);
Strings found; PathSet found;
for (Strings::iterator i = seen.begin(); i != seen.end(); i++) for (StringSet::iterator i = seen.begin(); i != seen.end(); i++) {
{ map<string, Path>::iterator j;
map<string, string>::iterator j;
if ((j = backMap.find(*i)) == backMap.end()) abort(); if ((j = backMap.find(*i)) == backMap.end()) abort();
found.push_back(j->second); found.insert(j->second);
} }
return found; return found;

View file

@ -4,7 +4,7 @@
#include "util.hh" #include "util.hh"
Strings filterReferences(const Path & path, const Strings & refs); PathSet scanForReferences(const Path & path, const PathSet & refs);
#endif /* !__REFERENCES_H */ #endif /* !__REFERENCES_H */

View file

@ -109,7 +109,7 @@ static unsigned char divMod(unsigned char * bytes, unsigned char y)
// omitted: E O U T // omitted: E O U T
char chars[] = "0123456789abcdfghijklmnpqrsvwxyz"; const string base32Chars = "0123456789abcdfghijklmnpqrsvwxyz";
string printHash32(const Hash & hash) string printHash32(const Hash & hash)
@ -117,6 +117,8 @@ string printHash32(const Hash & hash)
Hash hash2(hash); Hash hash2(hash);
unsigned int len = (hash.hashSize * 8 - 1) / 5 + 1; unsigned int len = (hash.hashSize * 8 - 1) / 5 + 1;
const char * chars = base32Chars.c_str();
string s(len, '0'); string s(len, '0');
int pos = len - 1; int pos = len - 1;
@ -165,6 +167,8 @@ Hash parseHash32(HashType ht, const string & s)
{ {
Hash hash(ht); Hash hash(ht);
const char * chars = base32Chars.c_str();
for (unsigned int i = 0; i < s.length(); ++i) { for (unsigned int i = 0; i < s.length(); ++i) {
char c = s[i]; char c = s[i];
unsigned char digit; unsigned char digit;

View file

@ -15,6 +15,8 @@ const int md5HashSize = 16;
const int sha1HashSize = 20; const int sha1HashSize = 20;
const int sha256HashSize = 32; const int sha256HashSize = 32;
extern const string base32Chars;
struct Hash struct Hash
{ {