ResidualVM logo ResidualVM website - Forums - Contact us BuildBot - Doxygen - Wiki curved edge

hashmap.h

Go to the documentation of this file.
00001 /* ScummVM - Graphic Adventure Engine
00002  *
00003  * ScummVM is the legal property of its developers, whose names
00004  * are too numerous to list here. Please refer to the COPYRIGHT
00005  * file distributed with this source distribution.
00006  *
00007  * This program is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU General Public License
00009  * as published by the Free Software Foundation; either version 2
00010  * of the License, or (at your option) any later version.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00020  *
00021  */
00022 
00023 // The hash map (associative array) implementation in this file is
00024 // based on the PyDict implementation of CPython.
00025 
00026 #ifndef COMMON_HASHMAP_H
00027 #define COMMON_HASHMAP_H
00028 
00035 //#define DEBUG_HASH_COLLISIONS
00036 
00043 #define USE_HASHMAP_MEMORY_POOL
00044 
00045 
00046 #include "common/func.h"
00047 
00048 #ifdef DEBUG_HASH_COLLISIONS
00049 #include "common/debug.h"
00050 #endif
00051 
00052 #ifdef USE_HASHMAP_MEMORY_POOL
00053 #include "common/memorypool.h"
00054 #endif
00055 
00056 
00057 
00058 namespace Common {
00059 
00060 // The sgi IRIX MIPSpro Compiler has difficulties with nested templates.
00061 // This and the other __sgi conditionals below work around these problems.
00062 // The Intel C++ Compiler suffers from the same problems.
00063 #if (defined(__sgi) && !defined(__GNUC__)) || defined(__INTEL_COMPILER)
00064 template<class T> class IteratorImpl;
00065 #endif
00066 
00067 
00081 template<class Key, class Val, class HashFunc = Hash<Key>, class EqualFunc = EqualTo<Key> >
00082 class HashMap {
00083 public:
00084     typedef uint size_type;
00085 
00086 private:
00087 
00088     typedef HashMap<Key, Val, HashFunc, EqualFunc> HM_t;
00089 
00090     struct Node {
00091         Val _value;
00092         const Key _key;
00093         explicit Node(const Key &key) : _key(key), _value() {}
00094         Node() : _key(), _value() {}
00095     };
00096 
00097     enum {
00098         HASHMAP_PERTURB_SHIFT = 5,
00099         HASHMAP_MIN_CAPACITY = 16,
00100 
00101         // The quotient of the next two constants controls how much the
00102         // internal storage of the hashmap may fill up before being
00103         // increased automatically.
00104         // Note: the quotient of these two must be between and different
00105         // from 0 and 1.
00106         HASHMAP_LOADFACTOR_NUMERATOR = 2,
00107         HASHMAP_LOADFACTOR_DENOMINATOR = 3,
00108 
00109         HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR
00110     };
00111 
00112 #ifdef USE_HASHMAP_MEMORY_POOL
00113     ObjectPool<Node, HASHMAP_MEMORYPOOL_SIZE> _nodePool;
00114 #endif
00115 
00117     Val _defaultVal;
00118 
00119     Node **_storage;    
00120     size_type _mask;        
00121     size_type _size;
00122     size_type _deleted; 
00123 
00124     HashFunc _hash;
00125     EqualFunc _equal;
00126 
00128     #define HASHMAP_DUMMY_NODE  ((Node *)1)
00129 
00130 #ifdef DEBUG_HASH_COLLISIONS
00131     mutable int _collisions, _lookups, _dummyHits;
00132 #endif
00133 
00134     Node *allocNode(const Key &key) {
00135 #ifdef USE_HASHMAP_MEMORY_POOL
00136         return new (_nodePool) Node(key);
00137 #else
00138         return new Node(key);
00139 #endif
00140     }
00141 
00142     void freeNode(Node *node) {
00143         if (node && node != HASHMAP_DUMMY_NODE)
00144 #ifdef USE_HASHMAP_MEMORY_POOL
00145             _nodePool.deleteChunk(node);
00146 #else
00147             delete node;
00148 #endif
00149     }
00150 
00151     void assign(const HM_t &map);
00152     size_type lookup(const Key &key) const;
00153     size_type lookupAndCreateIfMissing(const Key &key);
00154     void expandStorage(size_type newCapacity);
00155 
00156 #if !defined(__sgi) || defined(__GNUC__)
00157     template<class T> friend class IteratorImpl;
00158 #endif
00159 
00163     template<class NodeType>
00164     class IteratorImpl {
00165         friend class HashMap;
00166 #if (defined(__sgi) && !defined(__GNUC__)) || defined(__INTEL_COMPILER)
00167         template<class T> friend class Common::IteratorImpl;
00168 #else
00169         template<class T> friend class IteratorImpl;
00170 #endif
00171     protected:
00172         typedef const HashMap hashmap_t;
00173 
00174         size_type _idx;
00175         hashmap_t *_hashmap;
00176 
00177     protected:
00178         IteratorImpl(size_type idx, hashmap_t *hashmap) : _idx(idx), _hashmap(hashmap) {}
00179 
00180         NodeType *deref() const {
00181             assert(_hashmap != nullptr);
00182             assert(_idx <= _hashmap->_mask);
00183             Node *node = _hashmap->_storage[_idx];
00184             assert(node != nullptr);
00185             assert(node != HASHMAP_DUMMY_NODE);
00186             return node;
00187         }
00188 
00189     public:
00190         IteratorImpl() : _idx(0), _hashmap(nullptr) {}
00191         template<class T>
00192         IteratorImpl(const IteratorImpl<T> &c) : _idx(c._idx), _hashmap(c._hashmap) {}
00193 
00194         NodeType &operator*() const { return *deref(); }
00195         NodeType *operator->() const { return deref(); }
00196 
00197         bool operator==(const IteratorImpl &iter) const { return _idx == iter._idx && _hashmap == iter._hashmap; }
00198         bool operator!=(const IteratorImpl &iter) const { return !(*this == iter); }
00199 
00200         IteratorImpl &operator++() {
00201             assert(_hashmap);
00202             do {
00203                 _idx++;
00204             } while (_idx <= _hashmap->_mask && (_hashmap->_storage[_idx] == nullptr || _hashmap->_storage[_idx] == HASHMAP_DUMMY_NODE));
00205             if (_idx > _hashmap->_mask)
00206                 _idx = (size_type)-1;
00207 
00208             return *this;
00209         }
00210 
00211         IteratorImpl operator++(int) {
00212             IteratorImpl old = *this;
00213             operator ++();
00214             return old;
00215         }
00216     };
00217 
00218 public:
00219     typedef IteratorImpl<Node> iterator;
00220     typedef IteratorImpl<const Node> const_iterator;
00221 
00222     HashMap();
00223     HashMap(const HM_t &map);
00224     ~HashMap();
00225 
00226     HM_t &operator=(const HM_t &map) {
00227         if (this == &map)
00228             return *this;
00229 
00230         // Remove the previous content and ...
00231         clear();
00232         delete[] _storage;
00233         // ... copy the new stuff.
00234         assign(map);
00235         return *this;
00236     }
00237 
00238     bool contains(const Key &key) const;
00239 
00240     Val &operator[](const Key &key);
00241     const Val &operator[](const Key &key) const;
00242 
00243     Val &getVal(const Key &key);
00244     const Val &getVal(const Key &key) const;
00245     const Val &getVal(const Key &key, const Val &defaultVal) const;
00246     void setVal(const Key &key, const Val &val);
00247 
00248     void clear(bool shrinkArray = 0);
00249 
00250     void erase(iterator entry);
00251     void erase(const Key &key);
00252 
00253     size_type size() const { return _size; }
00254 
00255     iterator    begin() {
00256         // Find and return the first non-empty entry
00257         for (size_type ctr = 0; ctr <= _mask; ++ctr) {
00258             if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
00259                 return iterator(ctr, this);
00260         }
00261         return end();
00262     }
00263     iterator    end() {
00264         return iterator((size_type)-1, this);
00265     }
00266 
00267     const_iterator  begin() const {
00268         // Find and return the first non-empty entry
00269         for (size_type ctr = 0; ctr <= _mask; ++ctr) {
00270             if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
00271                 return const_iterator(ctr, this);
00272         }
00273         return end();
00274     }
00275     const_iterator  end() const {
00276         return const_iterator((size_type)-1, this);
00277     }
00278 
00279     iterator    find(const Key &key) {
00280         size_type ctr = lookup(key);
00281         if (_storage[ctr])
00282             return iterator(ctr, this);
00283         return end();
00284     }
00285 
00286     const_iterator  find(const Key &key) const {
00287         size_type ctr = lookup(key);
00288         if (_storage[ctr])
00289             return const_iterator(ctr, this);
00290         return end();
00291     }
00292 
00293     // TODO: insert() method?
00294 
00295     bool empty() const {
00296         return (_size == 0);
00297     }
00298 };
00299 
00300 //-------------------------------------------------------
00301 // HashMap functions
00302 
00306 template<class Key, class Val, class HashFunc, class EqualFunc>
00307 HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() : _defaultVal() {
00308     _mask = HASHMAP_MIN_CAPACITY - 1;
00309     _storage = new Node *[HASHMAP_MIN_CAPACITY];
00310     assert(_storage != nullptr);
00311     memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
00312 
00313     _size = 0;
00314     _deleted = 0;
00315 
00316 #ifdef DEBUG_HASH_COLLISIONS
00317     _collisions = 0;
00318     _lookups = 0;
00319     _dummyHits = 0;
00320 #endif
00321 }
00322 
00328 template<class Key, class Val, class HashFunc, class EqualFunc>
00329 HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) :
00330     _defaultVal() {
00331 #ifdef DEBUG_HASH_COLLISIONS
00332     _collisions = 0;
00333     _lookups = 0;
00334     _dummyHits = 0;
00335 #endif
00336     assign(map);
00337 }
00338 
00342 template<class Key, class Val, class HashFunc, class EqualFunc>
00343 HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() {
00344     for (size_type ctr = 0; ctr <= _mask; ++ctr)
00345       freeNode(_storage[ctr]);
00346 
00347     delete[] _storage;
00348 #ifdef DEBUG_HASH_COLLISIONS
00349     extern void updateHashCollisionStats(int, int, int, int, int);
00350     updateHashCollisionStats(_collisions, _dummyHits, _lookups, _mask + 1, _size);
00351 #endif
00352 }
00353 
00361 template<class Key, class Val, class HashFunc, class EqualFunc>
00362 void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) {
00363     _mask = map._mask;
00364     _storage = new Node *[_mask + 1];
00365     assert(_storage != nullptr);
00366     memset(_storage, 0, (_mask + 1) * sizeof(Node *));
00367 
00368     // Simply clone the map given to us, one by one.
00369     _size = 0;
00370     _deleted = 0;
00371     for (size_type ctr = 0; ctr <= _mask; ++ctr) {
00372         if (map._storage[ctr] == HASHMAP_DUMMY_NODE) {
00373             _storage[ctr] = HASHMAP_DUMMY_NODE;
00374             _deleted++;
00375         } else if (map._storage[ctr] != nullptr) {
00376             _storage[ctr] = allocNode(map._storage[ctr]->_key);
00377             _storage[ctr]->_value = map._storage[ctr]->_value;
00378             _size++;
00379         }
00380     }
00381     // Perform a sanity check (to help track down hashmap corruption)
00382     assert(_size == map._size);
00383     assert(_deleted == map._deleted);
00384 }
00385 
00386 
00387 template<class Key, class Val, class HashFunc, class EqualFunc>
00388 void HashMap<Key, Val, HashFunc, EqualFunc>::clear(bool shrinkArray) {
00389     for (size_type ctr = 0; ctr <= _mask; ++ctr) {
00390         freeNode(_storage[ctr]);
00391         _storage[ctr] = nullptr;
00392     }
00393 
00394 #ifdef USE_HASHMAP_MEMORY_POOL
00395     _nodePool.freeUnusedPages();
00396 #endif
00397 
00398     if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) {
00399         delete[] _storage;
00400 
00401         _mask = HASHMAP_MIN_CAPACITY - 1;
00402         _storage = new Node *[HASHMAP_MIN_CAPACITY];
00403         assert(_storage != nullptr);
00404         memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
00405     }
00406 
00407     _size = 0;
00408     _deleted = 0;
00409 }
00410 
00411 template<class Key, class Val, class HashFunc, class EqualFunc>
00412 void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(size_type newCapacity) {
00413     assert(newCapacity > _mask + 1);
00414 
00415 #ifndef NDEBUG
00416     const size_type old_size = _size;
00417 #endif
00418     const size_type old_mask = _mask;
00419     Node **old_storage = _storage;
00420 
00421     // allocate a new array
00422     _size = 0;
00423     _deleted = 0;
00424     _mask = newCapacity - 1;
00425     _storage = new Node *[newCapacity];
00426     assert(_storage != nullptr);
00427     memset(_storage, 0, newCapacity * sizeof(Node *));
00428 
00429     // rehash all the old elements
00430     for (size_type ctr = 0; ctr <= old_mask; ++ctr) {
00431         if (old_storage[ctr] == nullptr || old_storage[ctr] == HASHMAP_DUMMY_NODE)
00432             continue;
00433 
00434         // Insert the element from the old table into the new table.
00435         // Since we know that no key exists twice in the old table, we
00436         // can do this slightly better than by calling lookup, since we
00437         // don't have to call _equal().
00438         const size_type hash = _hash(old_storage[ctr]->_key);
00439         size_type idx = hash & _mask;
00440         for (size_type perturb = hash; _storage[idx] != nullptr && _storage[idx] != HASHMAP_DUMMY_NODE; perturb >>= HASHMAP_PERTURB_SHIFT) {
00441             idx = (5 * idx + perturb + 1) & _mask;
00442         }
00443 
00444         _storage[idx] = old_storage[ctr];
00445         _size++;
00446     }
00447 
00448     // Perform a sanity check: Old number of elements should match the new one!
00449     // This check will fail if some previous operation corrupted this hashmap.
00450     assert(_size == old_size);
00451 
00452     delete[] old_storage;
00453 
00454     return;
00455 }
00456 
00457 template<class Key, class Val, class HashFunc, class EqualFunc>
00458 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const {
00459     const size_type hash = _hash(key);
00460     size_type ctr = hash & _mask;
00461     for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
00462         if (_storage[ctr] == nullptr)
00463             break;
00464         if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
00465 #ifdef DEBUG_HASH_COLLISIONS
00466             _dummyHits++;
00467 #endif
00468         } else if (_equal(_storage[ctr]->_key, key))
00469             break;
00470 
00471         ctr = (5 * ctr + perturb + 1) & _mask;
00472 
00473 #ifdef DEBUG_HASH_COLLISIONS
00474         _collisions++;
00475 #endif
00476     }
00477 
00478 #ifdef DEBUG_HASH_COLLISIONS
00479     _lookups++;
00480     debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
00481         _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
00482         (const void *)this, _mask + 1, _size);
00483 #endif
00484 
00485     return ctr;
00486 }
00487 
00488 template<class Key, class Val, class HashFunc, class EqualFunc>
00489 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookupAndCreateIfMissing(const Key &key) {
00490     const size_type hash = _hash(key);
00491     size_type ctr = hash & _mask;
00492     const size_type NONE_FOUND = _mask + 1;
00493     size_type first_free = NONE_FOUND;
00494     bool found = false;
00495     for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
00496         if (_storage[ctr] == nullptr)
00497             break;
00498         if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
00499 #ifdef DEBUG_HASH_COLLISIONS
00500             _dummyHits++;
00501 #endif
00502             if (first_free == NONE_FOUND)
00503                 first_free = ctr;
00504         } else if (_equal(_storage[ctr]->_key, key)) {
00505             found = true;
00506             break;
00507         }
00508 
00509         ctr = (5 * ctr + perturb + 1) & _mask;
00510 
00511 #ifdef DEBUG_HASH_COLLISIONS
00512         _collisions++;
00513 #endif
00514     }
00515 
00516 #ifdef DEBUG_HASH_COLLISIONS
00517     _lookups++;
00518     debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
00519         _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
00520         (const void *)this, _mask + 1, _size);
00521 #endif
00522 
00523     if (!found && first_free != NONE_FOUND)
00524         ctr = first_free;
00525 
00526     if (!found) {
00527         if (_storage[ctr])
00528             _deleted--;
00529         _storage[ctr] = allocNode(key);
00530         assert(_storage[ctr] != nullptr);
00531         _size++;
00532 
00533         // Keep the load factor below a certain threshold.
00534         // Deleted nodes are also counted
00535         size_type capacity = _mask + 1;
00536         if ((_size + _deleted) * HASHMAP_LOADFACTOR_DENOMINATOR >
00537                 capacity * HASHMAP_LOADFACTOR_NUMERATOR) {
00538             capacity = capacity < 500 ? (capacity * 4) : (capacity * 2);
00539             expandStorage(capacity);
00540             ctr = lookup(key);
00541             assert(_storage[ctr] != nullptr);
00542         }
00543     }
00544 
00545     return ctr;
00546 }
00547 
00548 
00549 template<class Key, class Val, class HashFunc, class EqualFunc>
00550 bool HashMap<Key, Val, HashFunc, EqualFunc>::contains(const Key &key) const {
00551     size_type ctr = lookup(key);
00552     return (_storage[ctr] != nullptr);
00553 }
00554 
00555 template<class Key, class Val, class HashFunc, class EqualFunc>
00556 Val &HashMap<Key, Val, HashFunc, EqualFunc>::operator[](const Key &key) {
00557     return getVal(key);
00558 }
00559 
00560 template<class Key, class Val, class HashFunc, class EqualFunc>
00561 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::operator[](const Key &key) const {
00562     return getVal(key);
00563 }
00564 
00565 template<class Key, class Val, class HashFunc, class EqualFunc>
00566 Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key) {
00567     size_type ctr = lookupAndCreateIfMissing(key);
00568     assert(_storage[ctr] != nullptr);
00569     return _storage[ctr]->_value;
00570 }
00571 
00572 template<class Key, class Val, class HashFunc, class EqualFunc>
00573 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key) const {
00574     return getVal(key, _defaultVal);
00575 }
00576 
00577 template<class Key, class Val, class HashFunc, class EqualFunc>
00578 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key, const Val &defaultVal) const {
00579     size_type ctr = lookup(key);
00580     if (_storage[ctr] != nullptr)
00581         return _storage[ctr]->_value;
00582     else
00583         return defaultVal;
00584 }
00585 
00586 template<class Key, class Val, class HashFunc, class EqualFunc>
00587 void HashMap<Key, Val, HashFunc, EqualFunc>::setVal(const Key &key, const Val &val) {
00588     size_type ctr = lookupAndCreateIfMissing(key);
00589     assert(_storage[ctr] != nullptr);
00590     _storage[ctr]->_value = val;
00591 }
00592 
00593 template<class Key, class Val, class HashFunc, class EqualFunc>
00594 void HashMap<Key, Val, HashFunc, EqualFunc>::erase(iterator entry) {
00595     // Check whether we have a valid iterator
00596     assert(entry._hashmap == this);
00597     const size_type ctr = entry._idx;
00598     assert(ctr <= _mask);
00599     Node * const node = _storage[ctr];
00600     assert(node != NULL);
00601     assert(node != HASHMAP_DUMMY_NODE);
00602 
00603     // If we remove a key, we replace it with a dummy node.
00604     freeNode(node);
00605     _storage[ctr] = HASHMAP_DUMMY_NODE;
00606     _size--;
00607     _deleted++;
00608 }
00609 
00610 template<class Key, class Val, class HashFunc, class EqualFunc>
00611 void HashMap<Key, Val, HashFunc, EqualFunc>::erase(const Key &key) {
00612 
00613     size_type ctr = lookup(key);
00614     if (_storage[ctr] == nullptr)
00615         return;
00616 
00617     // If we remove a key, we replace it with a dummy node.
00618     freeNode(_storage[ctr]);
00619     _storage[ctr] = HASHMAP_DUMMY_NODE;
00620     _size--;
00621     _deleted++;
00622     return;
00623 }
00624 
00625 #undef HASHMAP_DUMMY_NODE
00626 
00627 } // End of namespace Common
00628 
00629 #endif


Generated on Sat Sep 19 2020 05:01:07 for ResidualVM by doxygen 1.7.1
curved edge   curved edge