From db0d865ec4fb646856731000d540d968011f4daf Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 3 May 2006 23:07:38 +0000 Subject: [PATCH] * New ATermMap, seems more-or-less finished. --- table/aterm-map.cc | 193 +++++++++++++++++++++++++++++++++------------ table/timings.txt | 11 +++ 2 files changed, 154 insertions(+), 50 deletions(-) diff --git a/table/aterm-map.cc b/table/aterm-map.cc index 53623d088..465cabf34 100644 --- a/table/aterm-map.cc +++ b/table/aterm-map.cc @@ -48,32 +48,53 @@ public: ATerm get(ATerm key) const; void remove(ATerm key); - void remove(const string & key); private: void init(unsigned int expectedCount); + void free(); + void resizeTable(unsigned int expectedCount); - unsigned int hash1(ATerm key) const; - unsigned int hash2(ATerm key) const; + void copy(KeyValue * elements, unsigned int size); + + inline unsigned int hash1(ATerm key) const; + inline unsigned int hash2(ATerm key) const; }; ATermMap::ATermMap(unsigned int expectedCount) { - init(expectedCount); + init(expectedCount * 10 / 9); /* slight adjustment */ +} + + +ATermMap::ATermMap(const ATermMap & map) +{ + init(map.maxCount); + copy(map.hashTable, map.size); +} + + +ATermMap & ATermMap::operator = (const ATermMap & map) +{ + if (this == &map) return *this; + free(); + init(map.maxCount); + copy(map.hashTable, map.size); + return *this; } ATermMap::~ATermMap() { - if (hashTable) free(hashTable); + free(); } void ATermMap::init(unsigned int expectedCount) { + assert(sizeof(ATerm) * 2 == sizeof(KeyValue)); size = 0; count = 0; maxCount = 0; @@ -82,88 +103,127 @@ void ATermMap::init(unsigned int expectedCount) } +void ATermMap::free() +{ + if (hashTable) { + ATunprotectArray((ATerm *) hashTable); + ::free(hashTable); + hashTable = 0; + } +} + + +static unsigned int roundToPowerOf2(unsigned int x) +{ + x--; + x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; + x++; + return x; +} + + +static const unsigned int maxLoadFactor = /* 1 / */ 3; +static unsigned int nrResizes = 0; + + void ATermMap::resizeTable(unsigned int expectedCount) { - assert(size == 0); + if (expectedCount == 0) expectedCount = 1; +// cout << maxCount << " -> " << expectedCount << endl; +// cout << maxCount << " " << size << endl; +// cout << (double) size / maxCount << endl; - this->maxCount = expectedCount; + unsigned int oldSize = size; + KeyValue * oldHashTable = hashTable; - unsigned int newSize = 128; + maxCount = expectedCount; + size = roundToPowerOf2(maxCount * maxLoadFactor); + hashTable = (KeyValue *) calloc(sizeof(KeyValue), size); + ATprotectArray((ATerm *) hashTable, size * 2); + +// cout << size << endl; - hashTable = (KeyValue *) calloc(sizeof(KeyValue), newSize); - - size = newSize; + /* Re-hash the elements in the old table. */ + if (oldSize != 0) { + count = 0; + copy(oldHashTable, oldSize); + ATunprotectArray((ATerm *) oldHashTable); + ::free(oldHashTable); + nrResizes++; + } } +void ATermMap::copy(KeyValue * elements, unsigned int size) +{ + for (unsigned int i = 0; i < size; ++i) + if (elements[i].value) /* i.e., non-empty, non-deleted element */ + set(elements[i].key, elements[i].value); +} + + +static const unsigned int shift = 16; +static const unsigned int knuth = (unsigned int) (0.6180339887 * (1 << shift)); + + unsigned int ATermMap::hash1(ATerm key) const { /* Don't care about the least significant bits of the ATerm pointer since they're always 0. */ unsigned int key2 = ((unsigned int) key) >> 2; -#if 0 - double d1 = key2 * 0.6180339887; - unsigned int h1 = (int) (size * (d1 - floor(d1))); -#endif + /* Approximately equal to: + double d = key2 * 0.6180339887; + unsigned int h = (int) (size * (d - floor(d))); + */ + + unsigned int h = (size * ((key2 * knuth) & ((1 << shift) - 1))) >> shift; -#if 0 - unsigned int h1 = size * (key2 * 61803 % 100000); -#endif - - unsigned int h1 = (size * ((key2 * 40503) & 0xffff)) >> 16; - -// cout << key2 << " " << h1 << endl; - -// unsigned int h1 = (key2 * 134217689) & (size - 1); - - return h1 % size; + return h; } unsigned int ATermMap::hash2(ATerm key) const { unsigned int key2 = ((unsigned int) key) >> 2; - -#if 0 - double d2 = key2 * 0.6180339887; - unsigned int h2 = 1 | (int) (size * (d2 - floor(d2))); -#endif - - unsigned int h3 = ((key2 * 134217689) & (size - 1)) | 1; - return h3; + /* Note: the result must be relatively prime to `size' (which is a + power of 2), so we make sure that the result is always odd. */ + unsigned int h = ((key2 * 134217689) & (size - 1)) | 1; + return h; } -unsigned int nrItemsSet = 0; -unsigned int nrSetProbes = 0; -unsigned int nrMaxProbes = 0; +static unsigned int nrItemsSet = 0; +static unsigned int nrSetProbes = 0; void ATermMap::set(ATerm key, ATerm value) { - unsigned int probes = 0; + if (count == maxCount) resizeTable(size * 2 / maxLoadFactor); + nrItemsSet++; for (unsigned int i = 0, h = hash1(key); i < size; ++i, h = (h + hash2(key)) & (size - 1)) { - assert(h < size); - probes++; + // assert(h < size); nrSetProbes++; - if (hashTable[h].key == 0) { - if (probes > nrMaxProbes) nrMaxProbes = probes; + /* Note: to see whether a slot is free, we check + hashTable[h].value, not hashTable[h].key, since we use + value == 0 to mark deleted slots. */ + if (hashTable[h].value == 0 || hashTable[h].key == key) { hashTable[h].key = key; hashTable[h].value = value; count++; return; } } + abort(); } -unsigned int nrItemsGet = 0; -unsigned int nrGetProbes = 0; +static unsigned int nrItemsGet = 0; +static unsigned int nrGetProbes = 0; ATerm ATermMap::get(ATerm key) const @@ -180,6 +240,20 @@ ATerm ATermMap::get(ATerm key) const } +void ATermMap::remove(ATerm key) +{ + for (unsigned int i = 0, h = hash1(key); i < size; + ++i, h = (h + hash2(key)) & (size - 1)) + { + if (hashTable[h].key == 0) return; + if (hashTable[h].key == key) { + hashTable[h].value = 0; + return; + } + } +} + + int main(int argc, char * * argv) { ATerm bottomOfStack; @@ -211,18 +285,37 @@ int main(int argc, char * * argv) for (int test = 0; test < 100000; ++test) { - ATermMap map(100); - for (int i = 0; i < 30; ++i) - map.set(someTerm(), someTerm()); - for (int i = 0; i < 100; ++i) + // cerr << test << endl; + unsigned int n = 300; + ATermMap map(300); + ATerm keys[n], values[n]; + for (unsigned int i = 0; i < n; ++i) { + keys[i] = someTerm(); + values[i] = someTerm(); + map.set(keys[i], values[i]); + // cerr << "INSERT: " << keys[i] << " " << values[i] << endl; + } + values[n - 1] = 0; + map.remove(keys[n - 1]); + for (unsigned int i = 0; i < n; ++i) { + if (map.get(keys[i]) != values[i]) { + for (unsigned int j = i + 1; j < n; ++j) + if (keys[i] == keys[j]) goto x; + cerr << "MISMATCH: " << keys[i] << " " << values[i] << " " << map.get(keys[i]) << " " << i << endl; + abort(); + x: ; + } + } + for (unsigned int i = 0; i < 100; ++i) map.get(someTerm()); } + cout << "RESIZES: " << nrResizes << endl; + cout << "SET: " << nrItemsSet << " " << nrSetProbes << " " - << (double) nrSetProbes / nrItemsSet << " " - << nrMaxProbes << endl; + << (double) nrSetProbes / nrItemsSet << endl; cout << "GET: " << nrItemsGet << " " diff --git a/table/timings.txt b/table/timings.txt index 26b428ade..cb68b9143 100644 --- a/table/timings.txt +++ b/table/timings.txt @@ -17,3 +17,14 @@ GET: 10000000 13032311 1.30323 real 0m0.956s user 0m0.928s sys 0m0.024s + + +[eelco@tyros:~/Dev/nix/table]$ time ./a.out +testing... +RESIZES: 0 +SET: 30000000 35420570 1.18069 +GET: 40000000 49507966 1.2377 + +real 0m3.796s +user 0m3.700s +sys 0m0.016s