ResidualVM logo ResidualVM website - Forums - Contact us BuildBot - Doxygen - Wiki curved edge

hashmap.h

Go to the documentation of this file.
00001 /* ScummVM - Graphic Adventure Engine
00002  *
00003  * ScummVM is the legal property of its developers, whose names
00004  * are too numerous to list here. Please refer to the COPYRIGHT
00005  * file distributed with this source distribution.
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License
00009  * as published by the Free Software Foundation; either version 2
00010  * of the License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00020  *
00021  */
00022 
00023 // The hash map (associative array) implementation in this file is
00024 // based on the PyDict implementation of CPython.
00025 
00026 #ifndef COMMON_HASHMAP_H
00027 #define COMMON_HASHMAP_H
00028 
00035 //#define DEBUG_HASH_COLLISIONS
00036 
00043 #define USE_HASHMAP_MEMORY_POOL
00044 
00045 
00046 #include "common/func.h"
00047 
00048 #ifdef DEBUG_HASH_COLLISIONS
00049 #include "common/debug.h"
00050 #endif
00051 
00052 #ifdef USE_HASHMAP_MEMORY_POOL
00053 #include "common/memorypool.h"
00054 #endif
00055 
00056 
00057 
00058 namespace Common {
00059 
00060 // The sgi IRIX MIPSpro Compiler has difficulties with nested templates.
00061 // This and the other __sgi conditionals below work around these problems.
00062 // The Intel C++ Compiler suffers from the same problems.
00063 #if (defined(__sgi) && !defined(__GNUC__)) || defined(__INTEL_COMPILER)
00064 template<class T> class IteratorImpl;
00065 #endif
00066 
00067 
00081 template<class Key, class Val, class HashFunc = Hash<Key>, class EqualFunc = EqualTo<Key> >
00082 class HashMap {
00083 public:
00084     typedef uint size_type;
00085 
00086 private:
00087 
00088     typedef HashMap<Key, Val, HashFunc, EqualFunc> HM_t;
00089 
00090     struct Node {
00091         const Key _key;
00092         Val _value;
00093         explicit Node(const Key &key) : _key(key), _value() {}
00094         Node() : _key(), _value() {}
00095     };
00096 
00097     enum {
00098         HASHMAP_PERTURB_SHIFT = 5,
00099         HASHMAP_MIN_CAPACITY = 16,
00100 
00101         // The quotient of the next two constants controls how much the
00102         // internal storage of the hashmap may fill up before being
00103         // increased automatically.
00104         // Note: the quotient of these two must be between and different
00105         // from 0 and 1.
00106         HASHMAP_LOADFACTOR_NUMERATOR = 2,
00107         HASHMAP_LOADFACTOR_DENOMINATOR = 3,
00108 
00109         HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR
00110     };
00111 
00112 #ifdef USE_HASHMAP_MEMORY_POOL
00113     ObjectPool<Node, HASHMAP_MEMORYPOOL_SIZE> _nodePool;
00114 #endif
00115 
00116     Node **_storage;    
00117     size_type _mask;        
00118     size_type _size;
00119     size_type _deleted; 
00120 
00121     HashFunc _hash;
00122     EqualFunc _equal;
00123 
00125     const Val _defaultVal;
00126 
00128     #define HASHMAP_DUMMY_NODE  ((Node *)1)
00129 
00130 #ifdef DEBUG_HASH_COLLISIONS
00131     mutable int _collisions, _lookups, _dummyHits;
00132 #endif
00133 
00134     Node *allocNode(const Key &key) {
00135 #ifdef USE_HASHMAP_MEMORY_POOL
00136         return new (_nodePool) Node(key);
00137 #else
00138         return new Node(key);
00139 #endif
00140     }
00141 
00142     void freeNode(Node *node) {
00143         if (node && node != HASHMAP_DUMMY_NODE)
00144 #ifdef USE_HASHMAP_MEMORY_POOL
00145             _nodePool.deleteChunk(node);
00146 #else
00147             delete node;
00148 #endif
00149     }
00150 
00151     void assign(const HM_t &map);
00152     size_type lookup(const Key &key) const;
00153     size_type lookupAndCreateIfMissing(const Key &key);
00154     void expandStorage(size_type newCapacity);
00155 
00156 #if !defined(__sgi) || defined(__GNUC__)
00157     template<class T> friend class IteratorImpl;
00158 #endif
00159 
00163     template<class NodeType>
00164     class IteratorImpl {
00165         friend class HashMap;
00166 #if (defined(__sgi) && !defined(__GNUC__)) || defined(__INTEL_COMPILER)
00167         template<class T> friend class Common::IteratorImpl;
00168 #else
00169         template<class T> friend class IteratorImpl;
00170 #endif
00171     protected:
00172         typedef const HashMap hashmap_t;
00173 
00174         size_type _idx;
00175         hashmap_t *_hashmap;
00176 
00177     protected:
00178         IteratorImpl(size_type idx, hashmap_t *hashmap) : _idx(idx), _hashmap(hashmap) {}
00179 
00180         NodeType *deref() const {
00181             assert(_hashmap != nullptr);
00182             assert(_idx <= _hashmap->_mask);
00183             Node *node = _hashmap->_storage[_idx];
00184             assert(node != nullptr);
00185             assert(node != HASHMAP_DUMMY_NODE);
00186             return node;
00187         }
00188 
00189     public:
00190         IteratorImpl() : _idx(0), _hashmap(nullptr) {}
00191         template<class T>
00192         IteratorImpl(const IteratorImpl<T> &c) : _idx(c._idx), _hashmap(c._hashmap) {}
00193 
00194         NodeType &operator*() const { return *deref(); }
00195         NodeType *operator->() const { return deref(); }
00196 
00197         bool operator==(const IteratorImpl &iter) const { return _idx == iter._idx && _hashmap == iter._hashmap; }
00198         bool operator!=(const IteratorImpl &iter) const { return !(*this == iter); }
00199 
00200         IteratorImpl &operator++() {
00201             assert(_hashmap);
00202             do {
00203                 _idx++;
00204             } while (_idx <= _hashmap->_mask && (_hashmap->_storage[_idx] == nullptr || _hashmap->_storage[_idx] == HASHMAP_DUMMY_NODE));
00205             if (_idx > _hashmap->_mask)
00206                 _idx = (size_type)-1;
00207 
00208             return *this;
00209         }
00210 
00211         IteratorImpl operator++(int) {
00212             IteratorImpl old = *this;
00213             operator ++();
00214             return old;
00215         }
00216     };
00217 
00218 public:
00219     typedef IteratorImpl<Node> iterator;
00220     typedef IteratorImpl<const Node> const_iterator;
00221 
00222     HashMap();
00223     HashMap(const HM_t &map);
00224     ~HashMap();
00225 
00226     HM_t &operator=(const HM_t &map) {
00227         if (this == &map)
00228             return *this;
00229 
00230         // Remove the previous content and ...
00231         clear();
00232         delete[] _storage;
00233         // ... copy the new stuff.
00234         assign(map);
00235         return *this;
00236     }
00237 
00238     bool contains(const Key &key) const;
00239 
00240     Val &operator[](const Key &key);
00241     const Val &operator[](const Key &key) const;
00242 
00243     Val &getVal(const Key &key);
00244     const Val &getVal(const Key &key) const;
00245     const Val &getVal(const Key &key, const Val &defaultVal) const;
00246     void setVal(const Key &key, const Val &val);
00247 
00248     void clear(bool shrinkArray = 0);
00249 
00250     void erase(iterator entry);
00251     void erase(const Key &key);
00252 
00253     size_type size() const { return _size; }
00254 
00255     iterator    begin() {
00256         // Find and return the first non-empty entry
00257         for (size_type ctr = 0; ctr <= _mask; ++ctr) {
00258             if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
00259                 return iterator(ctr, this);
00260         }
00261         return end();
00262     }
00263     iterator    end() {
00264         return iterator((size_type)-1, this);
00265     }
00266 
00267     const_iterator  begin() const {
00268         // Find and return the first non-empty entry
00269         for (size_type ctr = 0; ctr <= _mask; ++ctr) {
00270             if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
00271                 return const_iterator(ctr, this);
00272         }
00273         return end();
00274     }
00275     const_iterator  end() const {
00276         return const_iterator((size_type)-1, this);
00277     }
00278 
00279     iterator    find(const Key &key) {
00280         size_type ctr = lookup(key);
00281         if (_storage[ctr])
00282             return iterator(ctr, this);
00283         return end();
00284     }
00285 
00286     const_iterator  find(const Key &key) const {
00287         size_type ctr = lookup(key);
00288         if (_storage[ctr])
00289             return const_iterator(ctr, this);
00290         return end();
00291     }
00292 
00293     // TODO: insert() method?
00294 
00295     bool empty() const {
00296         return (_size == 0);
00297     }
00298 };
00299 
00300 //-------------------------------------------------------
00301 // HashMap functions
00302 
00306 template<class Key, class Val, class HashFunc, class EqualFunc>
00307 HashMap<Key, Val, HashFunc, EqualFunc>::HashMap()
00308 //
00309 // We have to skip _defaultVal() on PS2 to avoid gcc 3.2.2 ICE
00310 //
00311 #ifdef __PLAYSTATION2__
00312     {
00313 #else
00314     : _defaultVal() {
00315 #endif
00316     _mask = HASHMAP_MIN_CAPACITY - 1;
00317     _storage = new Node *[HASHMAP_MIN_CAPACITY];
00318     assert(_storage != nullptr);
00319     memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
00320 
00321     _size = 0;
00322     _deleted = 0;
00323 
00324 #ifdef DEBUG_HASH_COLLISIONS
00325     _collisions = 0;
00326     _lookups = 0;
00327     _dummyHits = 0;
00328 #endif
00329 }
00330 
00336 template<class Key, class Val, class HashFunc, class EqualFunc>
00337 HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) :
00338     _defaultVal() {
00339 #ifdef DEBUG_HASH_COLLISIONS
00340     _collisions = 0;
00341     _lookups = 0;
00342     _dummyHits = 0;
00343 #endif
00344     assign(map);
00345 }
00346 
00350 template<class Key, class Val, class HashFunc, class EqualFunc>
00351 HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() {
00352     for (size_type ctr = 0; ctr <= _mask; ++ctr)
00353       freeNode(_storage[ctr]);
00354 
00355     delete[] _storage;
00356 #ifdef DEBUG_HASH_COLLISIONS
00357     extern void updateHashCollisionStats(int, int, int, int, int);
00358     updateHashCollisionStats(_collisions, _dummyHits, _lookups, _mask + 1, _size);
00359 #endif
00360 }
00361 
00369 template<class Key, class Val, class HashFunc, class EqualFunc>
00370 void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) {
00371     _mask = map._mask;
00372     _storage = new Node *[_mask + 1];
00373     assert(_storage != nullptr);
00374     memset(_storage, 0, (_mask + 1) * sizeof(Node *));
00375 
00376     // Simply clone the map given to us, one by one.
00377     _size = 0;
00378     _deleted = 0;
00379     for (size_type ctr = 0; ctr <= _mask; ++ctr) {
00380         if (map._storage[ctr] == HASHMAP_DUMMY_NODE) {
00381             _storage[ctr] = HASHMAP_DUMMY_NODE;
00382             _deleted++;
00383         } else if (map._storage[ctr] != nullptr) {
00384             _storage[ctr] = allocNode(map._storage[ctr]->_key);
00385             _storage[ctr]->_value = map._storage[ctr]->_value;
00386             _size++;
00387         }
00388     }
00389     // Perform a sanity check (to help track down hashmap corruption)
00390     assert(_size == map._size);
00391     assert(_deleted == map._deleted);
00392 }
00393 
00394 
00395 template<class Key, class Val, class HashFunc, class EqualFunc>
00396 void HashMap<Key, Val, HashFunc, EqualFunc>::clear(bool shrinkArray) {
00397     for (size_type ctr = 0; ctr <= _mask; ++ctr) {
00398         freeNode(_storage[ctr]);
00399         _storage[ctr] = nullptr;
00400     }
00401 
00402 #ifdef USE_HASHMAP_MEMORY_POOL
00403     _nodePool.freeUnusedPages();
00404 #endif
00405 
00406     if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) {
00407         delete[] _storage;
00408 
00409         _mask = HASHMAP_MIN_CAPACITY - 1;
00410         _storage = new Node *[HASHMAP_MIN_CAPACITY];
00411         assert(_storage != nullptr);
00412         memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
00413     }
00414 
00415     _size = 0;
00416     _deleted = 0;
00417 }
00418 
00419 template<class Key, class Val, class HashFunc, class EqualFunc>
00420 void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(size_type newCapacity) {
00421     assert(newCapacity > _mask + 1);
00422 
00423 #ifndef NDEBUG
00424     const size_type old_size = _size;
00425 #endif
00426     const size_type old_mask = _mask;
00427     Node **old_storage = _storage;
00428 
00429     // allocate a new array
00430     _size = 0;
00431     _deleted = 0;
00432     _mask = newCapacity - 1;
00433     _storage = new Node *[newCapacity];
00434     assert(_storage != nullptr);
00435     memset(_storage, 0, newCapacity * sizeof(Node *));
00436 
00437     // rehash all the old elements
00438     for (size_type ctr = 0; ctr <= old_mask; ++ctr) {
00439         if (old_storage[ctr] == nullptr || old_storage[ctr] == HASHMAP_DUMMY_NODE)
00440             continue;
00441 
00442         // Insert the element from the old table into the new table.
00443         // Since we know that no key exists twice in the old table, we
00444         // can do this slightly better than by calling lookup, since we
00445         // don't have to call _equal().
00446         const size_type hash = _hash(old_storage[ctr]->_key);
00447         size_type idx = hash & _mask;
00448         for (size_type perturb = hash; _storage[idx] != nullptr && _storage[idx] != HASHMAP_DUMMY_NODE; perturb >>= HASHMAP_PERTURB_SHIFT) {
00449             idx = (5 * idx + perturb + 1) & _mask;
00450         }
00451 
00452         _storage[idx] = old_storage[ctr];
00453         _size++;
00454     }
00455 
00456     // Perform a sanity check: Old number of elements should match the new one!
00457     // This check will fail if some previous operation corrupted this hashmap.
00458     assert(_size == old_size);
00459 
00460     delete[] old_storage;
00461 
00462     return;
00463 }
00464 
00465 template<class Key, class Val, class HashFunc, class EqualFunc>
00466 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const {
00467     const size_type hash = _hash(key);
00468     size_type ctr = hash & _mask;
00469     for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
00470         if (_storage[ctr] == nullptr)
00471             break;
00472         if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
00473 #ifdef DEBUG_HASH_COLLISIONS
00474             _dummyHits++;
00475 #endif
00476         } else if (_equal(_storage[ctr]->_key, key))
00477             break;
00478 
00479         ctr = (5 * ctr + perturb + 1) & _mask;
00480 
00481 #ifdef DEBUG_HASH_COLLISIONS
00482         _collisions++;
00483 #endif
00484     }
00485 
00486 #ifdef DEBUG_HASH_COLLISIONS
00487     _lookups++;
00488     debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
00489         _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
00490         (const void *)this, _mask + 1, _size);
00491 #endif
00492 
00493     return ctr;
00494 }
00495 
00496 template<class Key, class Val, class HashFunc, class EqualFunc>
00497 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookupAndCreateIfMissing(const Key &key) {
00498     const size_type hash = _hash(key);
00499     size_type ctr = hash & _mask;
00500     const size_type NONE_FOUND = _mask + 1;
00501     size_type first_free = NONE_FOUND;
00502     bool found = false;
00503     for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
00504         if (_storage[ctr] == nullptr)
00505             break;
00506         if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
00507 #ifdef DEBUG_HASH_COLLISIONS
00508             _dummyHits++;
00509 #endif
00510             if (first_free == NONE_FOUND)
00511                 first_free = ctr;
00512         } else if (_equal(_storage[ctr]->_key, key)) {
00513             found = true;
00514             break;
00515         }
00516 
00517         ctr = (5 * ctr + perturb + 1) & _mask;
00518 
00519 #ifdef DEBUG_HASH_COLLISIONS
00520         _collisions++;
00521 #endif
00522     }
00523 
00524 #ifdef DEBUG_HASH_COLLISIONS
00525     _lookups++;
00526     debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
00527         _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
00528         (const void *)this, _mask + 1, _size);
00529 #endif
00530 
00531     if (!found && first_free != NONE_FOUND)
00532         ctr = first_free;
00533 
00534     if (!found) {
00535         if (_storage[ctr])
00536             _deleted--;
00537         _storage[ctr] = allocNode(key);
00538         assert(_storage[ctr] != nullptr);
00539         _size++;
00540 
00541         // Keep the load factor below a certain threshold.
00542         // Deleted nodes are also counted
00543         size_type capacity = _mask + 1;
00544         if ((_size + _deleted) * HASHMAP_LOADFACTOR_DENOMINATOR >
00545                 capacity * HASHMAP_LOADFACTOR_NUMERATOR) {
00546             capacity = capacity < 500 ? (capacity * 4) : (capacity * 2);
00547             expandStorage(capacity);
00548             ctr = lookup(key);
00549             assert(_storage[ctr] != nullptr);
00550         }
00551     }
00552 
00553     return ctr;
00554 }
00555 
00556 
00557 template<class Key, class Val, class HashFunc, class EqualFunc>
00558 bool HashMap<Key, Val, HashFunc, EqualFunc>::contains(const Key &key) const {
00559     size_type ctr = lookup(key);
00560     return (_storage[ctr] != nullptr);
00561 }
00562 
00563 template<class Key, class Val, class HashFunc, class EqualFunc>
00564 Val &HashMap<Key, Val, HashFunc, EqualFunc>::operator[](const Key &key) {
00565     return getVal(key);
00566 }
00567 
00568 template<class Key, class Val, class HashFunc, class EqualFunc>
00569 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::operator[](const Key &key) const {
00570     return getVal(key);
00571 }
00572 
00573 template<class Key, class Val, class HashFunc, class EqualFunc>
00574 Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key) {
00575     size_type ctr = lookupAndCreateIfMissing(key);
00576     assert(_storage[ctr] != nullptr);
00577     return _storage[ctr]->_value;
00578 }
00579 
00580 template<class Key, class Val, class HashFunc, class EqualFunc>
00581 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key) const {
00582     return getVal(key, _defaultVal);
00583 }
00584 
00585 template<class Key, class Val, class HashFunc, class EqualFunc>
00586 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key, const Val &defaultVal) const {
00587     size_type ctr = lookup(key);
00588     if (_storage[ctr] != nullptr)
00589         return _storage[ctr]->_value;
00590     else
00591         return defaultVal;
00592 }
00593 
00594 template<class Key, class Val, class HashFunc, class EqualFunc>
00595 void HashMap<Key, Val, HashFunc, EqualFunc>::setVal(const Key &key, const Val &val) {
00596     size_type ctr = lookupAndCreateIfMissing(key);
00597     assert(_storage[ctr] != nullptr);
00598     _storage[ctr]->_value = val;
00599 }
00600 
00601 template<class Key, class Val, class HashFunc, class EqualFunc>
00602 void HashMap<Key, Val, HashFunc, EqualFunc>::erase(iterator entry) {
00603     // Check whether we have a valid iterator
00604     assert(entry._hashmap == this);
00605     const size_type ctr = entry._idx;
00606     assert(ctr <= _mask);
00607     Node * const node = _storage[ctr];
00608     assert(node != NULL);
00609     assert(node != HASHMAP_DUMMY_NODE);
00610 
00611     // If we remove a key, we replace it with a dummy node.
00612     freeNode(node);
00613     _storage[ctr] = HASHMAP_DUMMY_NODE;
00614     _size--;
00615     _deleted++;
00616 }
00617 
00618 template<class Key, class Val, class HashFunc, class EqualFunc>
00619 void HashMap<Key, Val, HashFunc, EqualFunc>::erase(const Key &key) {
00620 
00621     size_type ctr = lookup(key);
00622     if (_storage[ctr] == nullptr)
00623         return;
00624 
00625     // If we remove a key, we replace it with a dummy node.
00626     freeNode(_storage[ctr]);
00627     _storage[ctr] = HASHMAP_DUMMY_NODE;
00628     _size--;
00629     _deleted++;
00630     return;
00631 }
00632 
00633 #undef HASHMAP_DUMMY_NODE
00634 
00635 } // End of namespace Common
00636 
00637 #endif


Generated on Sat May 25 2019 05:00:46 for ResidualVM by doxygen 1.7.1
curved edge   curved edge