]> Dogcows Code - chaz/yoink/blob - hash.tpp
bcb5bc598b7ffb18d9392b27055c228ca5135425
[chaz/yoink] / hash.tpp
1 ////////////////////////////////////////////////////////////////////////////////
2
3 // Author: Andy Rushton
4 // Copyright: (c) Southampton University 1999-2004
5 // (c) Andy Rushton 2004-2009
6 // License: BSD License, see ../docs/license.html
7
8 ////////////////////////////////////////////////////////////////////////////////
9
10 namespace stlplus
11 {
12
13 ////////////////////////////////////////////////////////////////////////////////
14 // the element stored in the hash
15
16 template<typename K, typename T, typename H, typename E>
17 class hash_element
18 {
19 public:
20 master_iterator<hash<K,T,H,E>, hash_element<K,T,H,E> > m_master;
21 std::pair<const K, T> m_value;
22 hash_element<K,T,H,E>* m_next;
23 unsigned m_hash;
24
25 hash_element(const hash<K,T,H,E>* owner, const K& key, const T& data, unsigned hash) :
26 m_master(owner,this), m_value(key,data), m_next(0), m_hash(hash)
27 {
28 }
29
30 hash_element(const hash<K,T,H,E>* owner, const std::pair<const K,T>& value, unsigned hash) :
31 m_master(owner,this), m_value(value), m_next(0), m_hash(hash)
32 {
33 }
34
35 ~hash_element(void)
36 {
37 m_next = 0;
38 m_hash = 0;
39 }
40
41 const hash<K,T,H,E>* owner(void) const
42 {
43 return m_master.owner();
44 }
45
46 // generate the bin number from the hash value and the owner's number of bins
47 unsigned bin(void) const
48 {
49 return m_hash % (owner()->m_bins);
50 }
51 };
52
53 ////////////////////////////////////////////////////////////////////////////////
54 // iterator
55
56 // null constructor
57 template<typename K, typename T, class H, class E, typename V>
58 hash_iterator<K,T,H,E,V>::hash_iterator(void)
59 {
60 }
61
62 // non-null constructor used from within the hash to construct a valid iterator
63 template<typename K, typename T, class H, class E, typename V>
64 hash_iterator<K,T,H,E,V>::hash_iterator(hash_element<K,T,H,E>* element) :
65 safe_iterator<hash<K,T,H,E>,hash_element<K,T,H,E> >(element->m_master)
66 {
67 }
68
69 // constructor used to create an end iterator
70 template<typename K, typename T, class H, class E, typename V>
71 hash_iterator<K,T,H,E,V>::hash_iterator(const hash<K,T,H,E>* owner) :
72 safe_iterator<hash<K,T,H,E>,hash_element<K,T,H,E> >(owner)
73 {
74 }
75
76 template<typename K, typename T, class H, class E, typename V>
77 hash_iterator<K,T,H,E,V>::hash_iterator(const safe_iterator<hash<K,T,H,E>, hash_element<K,T,H,E> >& iterator) :
78 safe_iterator<hash<K,T,H,E>,hash_element<K,T,H,E> >(iterator)
79 {
80 }
81
82 // destructor
83
84 template<typename K, typename T, class H, class E, typename V>
85 hash_iterator<K,T,H,E,V>::~hash_iterator(void)
86 {
87 }
88
89 // mode conversions
90
91 template<typename K, typename T, class H, class E, typename V>
92 TYPENAME hash_iterator<K,T,H,E,V>::const_iterator hash_iterator<K,T,H,E,V>::constify(void) const
93 {
94 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(*this);
95 }
96
97 template<typename K, typename T, class H, class E, typename V>
98 TYPENAME hash_iterator<K,T,H,E,V>::iterator hash_iterator<K,T,H,E,V>::deconstify(void) const
99 {
100 return hash_iterator<K,T,H,E,std::pair<const K,T> >(*this);
101 }
102
103 // increment operator looks for the next element in the table
104 // if there isn't one, then this becomes an end() iterator with m_bin = m_bins
105 template<typename K, typename T, class H, class E, typename V>
106 TYPENAME hash_iterator<K,T,H,E,V>::this_iterator& hash_iterator<K,T,H,E,V>::operator ++ (void)
107 throw(null_dereference,end_dereference)
108 {
109 this->assert_valid();
110 if (this->node()->m_next)
111 set(this->node()->m_next->m_master);
112 else
113 {
114 // failing that, subsequent hash values are tried until either an element is found or there are no more bins
115 // in which case it becomes an end() iterator
116 hash_element<K,T,H,E>* element = 0;
117 unsigned current_bin = this->node()->bin();
118 for(current_bin++; !element && (current_bin < this->owner()->m_bins); current_bin++)
119 element = this->owner()->m_values[current_bin];
120 if (element)
121 set(element->m_master);
122 else
123 this->set_end();
124 }
125 return *this;
126 }
127
128 // post-increment is defined in terms of pre-increment
129 template<typename K, typename T, class H, class E, typename V>
130 TYPENAME hash_iterator<K,T,H,E,V>::this_iterator hash_iterator<K,T,H,E,V>::operator ++ (int)
131 throw(null_dereference,end_dereference)
132 {
133 hash_iterator<K,T,H,E,V> old(*this);
134 ++(*this);
135 return old;
136 }
137
138 // two iterators are equal if they point to the same element
139 // both iterators must be non-null and belong to the same table
140 template<typename K, typename T, class H, class E, typename V>
141 bool hash_iterator<K,T,H,E,V>::operator == (const hash_iterator<K,T,H,E,V>& r) const
142 {
143 return equal(r);
144 }
145
146 template<typename K, typename T, class H, class E, typename V>
147 bool hash_iterator<K,T,H,E,V>::operator != (const hash_iterator<K,T,H,E,V>& r) const
148 {
149 return !operator==(r);
150 }
151
152 template<typename K, typename T, class H, class E, typename V>
153 bool hash_iterator<K,T,H,E,V>::operator < (const hash_iterator<K,T,H,E,V>& r) const
154 {
155 return compare(r) < 0;
156 }
157
158 // iterator dereferencing is only legal on a non-null iterator
159 template<typename K, typename T, class H, class E, typename V>
160 V& hash_iterator<K,T,H,E,V>::operator*(void) const
161 throw(null_dereference,end_dereference)
162 {
163 this->assert_valid();
164 return this->node()->m_value;
165 }
166
167 template<typename K, typename T, class H, class E, typename V>
168 V* hash_iterator<K,T,H,E,V>::operator->(void) const
169 throw(null_dereference,end_dereference)
170 {
171 return &(operator*());
172 }
173
174 ////////////////////////////////////////////////////////////////////////////////
175 // hash
176
177 // totally arbitrary initial size used for auto-rehashed tables
178 static unsigned hash_default_bins = 127;
179
180 // constructor
181 // tests whether the user wants auto-rehash
182 // sets the rehash point to be a loading of 1.0 by setting it to the number of bins
183 // uses the user's size unless this is zero, in which case implement the default
184
185 template<typename K, typename T, class H, class E>
186 hash<K,T,H,E>::hash(unsigned bins) :
187 m_rehash(bins), m_bins(bins > 0 ? bins : hash_default_bins), m_size(0), m_values(0)
188 {
189 m_values = new hash_element<K,T,H,E>*[m_bins];
190 for (unsigned i = 0; i < m_bins; i++)
191 m_values[i] = 0;
192 }
193
194 template<typename K, typename T, class H, class E>
195 hash<K,T,H,E>::~hash(void)
196 {
197 // delete all the elements
198 clear();
199 // and delete the data structure
200 delete[] m_values;
201 m_values = 0;
202 }
203
204 // as usual, implement the copy constructor i.t.o. the assignment operator
205
206 template<typename K, typename T, class H, class E>
207 hash<K,T,H,E>::hash(const hash<K,T,H,E>& right) :
208 m_rehash(right.m_rehash), m_bins(right.m_bins), m_size(0), m_values(0)
209 {
210 m_values = new hash_element<K,T,H,E>*[right.m_bins];
211 // copy the rehash behaviour as well as the size
212 for (unsigned i = 0; i < m_bins; i++)
213 m_values[i] = 0;
214 *this = right;
215 }
216
217 // assignment operator
218 // this is done by copying the elements
219 // the source and target hashes can be different sizes
220 // the hash is self-copy safe, i.e. it is legal to say x = x;
221
222 template<typename K, typename T, class H, class E>
223 hash<K,T,H,E>& hash<K,T,H,E>::operator = (const hash<K,T,H,E>& r)
224 {
225 // make self-copy safe
226 if (&r == this) return *this;
227 // remove all the existing elements
228 clear();
229 // copy the elements across - remember that this is rehashing because the two
230 // tables can be different sizes so there is no quick way of doing this by
231 // copying the lists
232 for (hash_iterator<K,T,H,E,const std::pair<const K,T> > i = r.begin(); i != r.end(); ++i)
233 insert(i->first, i->second);
234 return *this;
235 }
236
237 // number of values in the hash
238 template<typename K, typename T, class H, class E>
239 bool hash<K,T,H,E>::empty(void) const
240 {
241 return m_size == 0;
242 }
243
244 template<typename K, typename T, class H, class E>
245 unsigned hash<K,T,H,E>::size(void) const
246 {
247 return m_size;
248 }
249
250 // equality
251 template<typename K, typename T, class H, class E>
252 bool hash<K,T,H,E>::operator == (const hash<K,T,H,E>& right) const
253 {
254 // this table is the same as the right table if they are the same table!
255 if (&right == this) return true;
256 // they must be the same size to be equal
257 if (m_size != right.m_size) return false;
258 // now every key in this must be in right and have the same data
259 for (hash_iterator<K,T,H,E,const std::pair<const K,T> > i = begin(); i != end(); i++)
260 {
261 hash_iterator<K,T,H,E,const std::pair<const K,T> > found = right.find(i->first);
262 if (found == right.end()) return false;
263 if (!(i->second == found->second)) return false;
264 }
265 return true;
266 }
267
268 // set up the hash to auto-rehash at a specific size
269 // setting the rehash size to 0 forces manual rehashing
270 template<typename K, typename T, class H, class E>
271 void hash<K,T,H,E>::auto_rehash(void)
272 {
273 m_rehash = m_bins;
274 }
275
276 template<typename K, typename T, class H, class E>
277 void hash<K,T,H,E>::manual_rehash(void)
278 {
279 m_rehash = 0;
280 }
281
282 // the rehash function
283 // builds a new hash table and moves the elements (without copying) from the old to the new
284 // I store the un-modulused hash value in the element for more efficient rehashing
285 // passing 0 to the bins parameter does auto-rehashing
286 // passing any other value forces the number of bins
287
288 template<typename K, typename T, class H, class E>
289 void hash<K,T,H,E>::rehash(unsigned bins)
290 {
291 // user specified size: just take the user's value
292 // auto calculate: if the load is high, increase the size; else do nothing
293 unsigned new_bins = bins ? bins : m_bins;
294 if (bins == 0 && m_size > 0)
295 {
296 // these numbers are pretty arbitrary
297 // TODO - make them user-customisable?
298 float load = loading();
299 if (load > 2.0)
300 new_bins = (unsigned)(m_bins * load);
301 else if (load > 1.0)
302 new_bins = m_bins * 2;
303 }
304 if (new_bins == m_bins) return;
305 // set the new rehashing point if auto-rehashing is on
306 if (m_rehash) m_rehash = new_bins;
307 // move aside the old structure
308 hash_element<K,T,H,E>** old_values = m_values;
309 unsigned old_bins = m_bins;
310 // create a replacement structure
311 m_values = new hash_element<K,T,H,E>*[new_bins];
312 for (unsigned i = 0; i < new_bins; i++)
313 m_values[i] = 0;
314 m_bins = new_bins;
315 // move all the old elements across, rehashing each one
316 for (unsigned j = 0; j < old_bins; j++)
317 {
318 while(old_values[j])
319 {
320 // unhook from the old structure
321 hash_element<K,T,H,E>* current = old_values[j];
322 old_values[j] = current->m_next;
323 // rehash using the stored hash value
324 unsigned bin = current->bin();
325 // hook it into the new structure
326 current->m_next = m_values[bin];
327 m_values[bin] = current;
328 }
329 }
330 // now delete the old structure
331 delete[] old_values;
332 }
333
334 // the loading is the average number of elements per bin
335 // this simplifies to the total elements divided by the number of bins
336
337 template<typename K, typename T, class H, class E>
338 float hash<K,T,H,E>::loading(void) const
339 {
340 return (float)m_size / (float)m_bins;
341 }
342
343 // remove all elements from the table
344
345 template<typename K, typename T, class H, class E>
346 void hash<K,T,H,E>::erase(void)
347 {
348 // unhook the list elements and destroy them
349 for (unsigned i = 0; i < m_bins; i++)
350 {
351 hash_element<K,T,H,E>* current = m_values[i];
352 while(current)
353 {
354 hash_element<K,T,H,E>* next = current->m_next;
355 delete current;
356 current = next;
357 }
358 m_values[i] = 0;
359 }
360 m_size = 0;
361 }
362
363 // test for whether a key is present in the table
364
365 template<typename K, typename T, class H, class E>
366 bool hash<K,T,H,E>::present(const K& key) const
367 {
368 return find(key) != end();
369 }
370
371 template<typename K, typename T, class H, class E>
372 TYPENAME hash<K,T,H,E>::size_type hash<K,T,H,E>::count(const K& key) const
373 {
374 return present() ? 1 : 0;
375 }
376
377 // add a key and data element to the table - defined in terms of the general-purpose pair insert function
378
379 template<typename K, typename T, class H, class E>
380 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::insert(const K& key, const T& data)
381 {
382 return insert(std::pair<const K,T>(key,data)).first;
383 }
384
385 // insert a key/data pair into the table
386 // this removes any old value with the same key since there is no multihash functionality
387
388 template<typename K, typename T, class H, class E>
389 std::pair<TYPENAME hash<K,T,H,E>::iterator, bool> hash<K,T,H,E>::insert(const std::pair<const K,T>& value)
390 {
391 // if auto-rehash is enabled, implement the auto-rehash before inserting the new value
392 // the table is rehashed if this insertion makes the loading exceed 1.0
393 if (m_rehash && (m_size >= m_rehash)) rehash();
394 // calculate the new hash value
395 unsigned hash_value_full = H()(value.first);
396 unsigned bin = hash_value_full % m_bins;
397 bool inserted = true;
398 // unhook any previous value with this key
399 // this has been inlined from erase(key) so that the hash value is not calculated twice
400 hash_element<K,T,H,E>* previous = 0;
401 for (hash_element<K,T,H,E>* current = m_values[bin]; current; previous = current, current = current->m_next)
402 {
403 // first check the full stored hash value
404 if (current->m_hash != hash_value_full) continue;
405
406 // next try the equality operator
407 if (!E()(current->m_value.first, value.first)) continue;
408
409 // unhook this value and destroy it
410 if (previous)
411 previous->m_next = current->m_next;
412 else
413 m_values[bin] = current->m_next;
414 delete current;
415 m_size--;
416
417 // we've overwritten a previous value
418 inserted = false;
419
420 // assume there can only be one match so we can give up now
421 break;
422 }
423 // now hook in a new list element at the start of the list for this hash value
424 hash_element<K,T,H,E>* new_item = new hash_element<K,T,H,E>(this, value, hash_value_full);
425 new_item->m_next = m_values[bin];
426 m_values[bin] = new_item;
427 // increment the size count
428 m_size++;
429 // construct an iterator from the list node, and return whether inserted
430 return std::make_pair(hash_iterator<K,T,H,E,std::pair<const K,T> >(new_item), inserted);
431 }
432
433 // insert a key with an empty data field ready to be filled in later
434
435 template<typename K, typename T, class H, class E>
436 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::insert(const K& key)
437 {
438 return insert(key,T());
439 }
440
441 // remove a key from the table - return true if the key was found and removed, false if it wasn't present
442
443 template<typename K, typename T, class H, class E>
444 bool hash<K,T,H,E>::erase(const K& key)
445 {
446 unsigned hash_value_full = H()(key);
447 unsigned bin = hash_value_full % m_bins;
448 // scan the list for an element with this key
449 // need to keep a previous pointer because the lists are single-linked
450 hash_element<K,T,H,E>* previous = 0;
451 for (hash_element<K,T,H,E>* current = m_values[bin]; current; previous = current, current = current->m_next)
452 {
453 // first check the full stored hash value
454 if (current->m_hash != hash_value_full) continue;
455
456 // next try the equality operator
457 if (!E()(current->m_value.first, key)) continue;
458
459 // found this key, so unhook the element from the list
460 if (previous)
461 previous->m_next = current->m_next;
462 else
463 m_values[bin] = current->m_next;
464 // destroy it
465 delete current;
466 // remember to maintain the size count
467 m_size--;
468 return true;
469 }
470 return false;
471 }
472
473 template<typename K, typename T, class H, class E>
474 void hash<K,T,H,E>::clear(void)
475 {
476 erase();
477 }
478
479 // search for a key in the table and return an iterator to it
480 // if the search fails, returns an end() iterator
481
482 template<typename K, typename T, class H, class E>
483 TYPENAME hash<K,T,H,E>::const_iterator hash<K,T,H,E>::find(const K& key) const
484 {
485 // scan the list for this key's hash value for the element with a matching key
486 unsigned hash_value_full = H()(key);
487 unsigned bin = hash_value_full % m_bins;
488 for (hash_element<K,T,H,E>* current = m_values[bin]; current; current = current->m_next)
489 {
490 if (current->m_hash == hash_value_full && E()(current->m_value.first, key))
491 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(current);
492 }
493 return end();
494 }
495
496 template<typename K, typename T, class H, class E>
497 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::find(const K& key)
498 {
499 // scan the list for this key's hash value for the element with a matching key
500 unsigned hash_value_full = H()(key);
501 unsigned bin = hash_value_full % m_bins;
502 for (hash_element<K,T,H,E>* current = m_values[bin]; current; current = current->m_next)
503 {
504 if (current->m_hash == hash_value_full && E()(current->m_value.first, key))
505 return hash_iterator<K,T,H,E,std::pair<const K,T> >(current);
506 }
507 return end();
508 }
509
510 // table lookup by key using the index operator[], returning a reference to the data field, not an iterator
511 // this is rather like the std::map's [] operator
512 // the difference is that I have a const and non-const version
513 // the const version will not create the element if not present already, but the non-const version will
514 // the non-const version is compatible with the behaviour of the map
515
516 template<typename K, typename T, class H, class E>
517 const T& hash<K,T,H,E>::operator[] (const K& key) const throw(std::out_of_range)
518 {
519 // this const version cannot change the hash, so has to raise an exception if the key is missing
520 hash_iterator<K,T,H,E,const std::pair<const K,T> > found = find(key);
521 if (found == end())
522 throw std::out_of_range("key not found in stlplus::hash::operator[]");
523 return found->second;
524 }
525
526 template<typename K, typename T, class H, class E>
527 T& hash<K,T,H,E>::operator[] (const K& key)
528 {
529 // this non-const version can change the hash, so creates a new element if the key is missing
530 hash_iterator<K,T,H,E,std::pair<const K,T> > found = find(key);
531 if (found == end())
532 found = insert(key);
533 return found->second;
534 }
535
536 // iterators
537
538 template<typename K, typename T, class H, class E>
539 TYPENAME hash<K,T,H,E>::const_iterator hash<K,T,H,E>::begin(void) const
540 {
541 // find the first element
542 for (unsigned bin = 0; bin < m_bins; bin++)
543 if (m_values[bin])
544 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(m_values[bin]);
545 // if the hash is empty, return the end iterator
546 return end();
547 }
548
549 template<typename K, typename T, class H, class E>
550 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::begin(void)
551 {
552 // find the first element
553 for (unsigned bin = 0; bin < m_bins; bin++)
554 if (m_values[bin])
555 return hash_iterator<K,T,H,E,std::pair<const K,T> >(m_values[bin]);
556 // if the hash is empty, return the end iterator
557 return end();
558 }
559
560 template<typename K, typename T, class H, class E>
561 TYPENAME hash<K,T,H,E>::const_iterator hash<K,T,H,E>::end(void) const
562 {
563 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(this);
564 }
565
566 template<typename K, typename T, class H, class E>
567 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::end(void)
568 {
569 return hash_iterator<K,T,H,E,std::pair<const K,T> >(this);
570 }
571
572 ////////////////////////////////////////////////////////////////////////////////
573
574 } // end namespace stlplus
575
This page took 0.064734 seconds and 3 git commands to generate.