]> Dogcows Code - chaz/yoink/blob - src/stlplus/containers/hash.tpp
da2e39df5f5beebbb1d3908b95897aa20fc765c6
[chaz/yoink] / src / stlplus / containers / hash.tpp
1 ////////////////////////////////////////////////////////////////////////////////
2
3 // Author: Andy Rushton
4 // Copyright: (c) Southampton University 1999-2004
5 // (c) Andy Rushton 2004-2009
6 // License: BSD License, see ../docs/license.html
7
8 ////////////////////////////////////////////////////////////////////////////////
9 #include <iomanip>
10
11 namespace stlplus
12 {
13
14 ////////////////////////////////////////////////////////////////////////////////
15 // the element stored in the hash
16
17 template<typename K, typename T, typename H, typename E>
18 class hash_element
19 {
20 public:
21 master_iterator<hash<K,T,H,E>, hash_element<K,T,H,E> > m_master;
22 std::pair<const K, T> m_value;
23 hash_element<K,T,H,E>* m_next;
24 unsigned m_hash;
25
26 hash_element(const hash<K,T,H,E>* owner, const K& key, const T& data, unsigned hash) :
27 m_master(owner,this), m_value(key,data), m_next(0), m_hash(hash)
28 {
29 }
30
31 hash_element(const hash<K,T,H,E>* owner, const std::pair<const K,T>& value, unsigned hash) :
32 m_master(owner,this), m_value(value), m_next(0), m_hash(hash)
33 {
34 }
35
36 ~hash_element(void)
37 {
38 m_next = 0;
39 m_hash = 0;
40 }
41
42 const hash<K,T,H,E>* owner(void) const
43 {
44 return m_master.owner();
45 }
46
47 // generate the bin number from the hash value and the owner's number of bins
48 unsigned bin(void) const
49 {
50 return m_hash % (owner()->m_bins);
51 }
52 };
53
54 ////////////////////////////////////////////////////////////////////////////////
55 // iterator
56
57 // null constructor
58 template<typename K, typename T, class H, class E, typename V>
59 hash_iterator<K,T,H,E,V>::hash_iterator(void)
60 {
61 }
62
63 // non-null constructor used from within the hash to construct a valid iterator
64 template<typename K, typename T, class H, class E, typename V>
65 hash_iterator<K,T,H,E,V>::hash_iterator(hash_element<K,T,H,E>* element) :
66 safe_iterator<hash<K,T,H,E>,hash_element<K,T,H,E> >(element->m_master)
67 {
68 }
69
70 // constructor used to create an end iterator
71 template<typename K, typename T, class H, class E, typename V>
72 hash_iterator<K,T,H,E,V>::hash_iterator(const hash<K,T,H,E>* owner) :
73 safe_iterator<hash<K,T,H,E>,hash_element<K,T,H,E> >(owner)
74 {
75 }
76
77 template<typename K, typename T, class H, class E, typename V>
78 hash_iterator<K,T,H,E,V>::hash_iterator(const safe_iterator<hash<K,T,H,E>, hash_element<K,T,H,E> >& iterator) :
79 safe_iterator<hash<K,T,H,E>,hash_element<K,T,H,E> >(iterator)
80 {
81 }
82
83 // destructor
84
85 template<typename K, typename T, class H, class E, typename V>
86 hash_iterator<K,T,H,E,V>::~hash_iterator(void)
87 {
88 }
89
90 // mode conversions
91
92 template<typename K, typename T, class H, class E, typename V>
93 TYPENAME hash_iterator<K,T,H,E,V>::const_iterator hash_iterator<K,T,H,E,V>::constify(void) const
94 {
95 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(*this);
96 }
97
98 template<typename K, typename T, class H, class E, typename V>
99 TYPENAME hash_iterator<K,T,H,E,V>::iterator hash_iterator<K,T,H,E,V>::deconstify(void) const
100 {
101 return hash_iterator<K,T,H,E,std::pair<const K,T> >(*this);
102 }
103
104 // increment operator looks for the next element in the table
105 // if there isn't one, then this becomes an end() iterator with m_bin = m_bins
106 template<typename K, typename T, class H, class E, typename V>
107 TYPENAME hash_iterator<K,T,H,E,V>::this_iterator& hash_iterator<K,T,H,E,V>::operator ++ (void)
108 throw(null_dereference,end_dereference)
109 {
110 this->assert_valid();
111 if (this->node()->m_next)
112 set(this->node()->m_next->m_master);
113 else
114 {
115 // failing that, subsequent hash values are tried until either an element is found or there are no more bins
116 // in which case it becomes an end() iterator
117 hash_element<K,T,H,E>* element = 0;
118 unsigned current_bin = this->node()->bin();
119 for(current_bin++; !element && (current_bin < this->owner()->m_bins); current_bin++)
120 element = this->owner()->m_values[current_bin];
121 if (element)
122 set(element->m_master);
123 else
124 this->set_end();
125 }
126 return *this;
127 }
128
129 // post-increment is defined in terms of pre-increment
130 template<typename K, typename T, class H, class E, typename V>
131 TYPENAME hash_iterator<K,T,H,E,V>::this_iterator hash_iterator<K,T,H,E,V>::operator ++ (int)
132 throw(null_dereference,end_dereference)
133 {
134 hash_iterator<K,T,H,E,V> old(*this);
135 ++(*this);
136 return old;
137 }
138
139 // two iterators are equal if they point to the same element
140 // both iterators must be non-null and belong to the same table
141 template<typename K, typename T, class H, class E, typename V>
142 bool hash_iterator<K,T,H,E,V>::operator == (const hash_iterator<K,T,H,E,V>& r) const
143 {
144 return equal(r);
145 }
146
147 template<typename K, typename T, class H, class E, typename V>
148 bool hash_iterator<K,T,H,E,V>::operator != (const hash_iterator<K,T,H,E,V>& r) const
149 {
150 return !operator==(r);
151 }
152
153 template<typename K, typename T, class H, class E, typename V>
154 bool hash_iterator<K,T,H,E,V>::operator < (const hash_iterator<K,T,H,E,V>& r) const
155 {
156 return compare(r) < 0;
157 }
158
159 // iterator dereferencing is only legal on a non-null iterator
160 template<typename K, typename T, class H, class E, typename V>
161 V& hash_iterator<K,T,H,E,V>::operator*(void) const
162 throw(null_dereference,end_dereference)
163 {
164 this->assert_valid();
165 return this->node()->m_value;
166 }
167
168 template<typename K, typename T, class H, class E, typename V>
169 V* hash_iterator<K,T,H,E,V>::operator->(void) const
170 throw(null_dereference,end_dereference)
171 {
172 return &(operator*());
173 }
174
175 ////////////////////////////////////////////////////////////////////////////////
176 // hash
177
178 // totally arbitrary initial size used for auto-rehashed tables
179 static unsigned hash_default_bins = 127;
180
181 // constructor
182 // tests whether the user wants auto-rehash
183 // sets the rehash point to be a loading of 1.0 by setting it to the number of bins
184 // uses the user's size unless this is zero, in which case implement the default
185
186 template<typename K, typename T, class H, class E>
187 hash<K,T,H,E>::hash(unsigned bins) :
188 m_rehash(bins), m_bins(bins > 0 ? bins : hash_default_bins), m_size(0), m_values(0)
189 {
190 m_values = new hash_element<K,T,H,E>*[m_bins];
191 for (unsigned i = 0; i < m_bins; i++)
192 m_values[i] = 0;
193 }
194
195 template<typename K, typename T, class H, class E>
196 hash<K,T,H,E>::~hash(void)
197 {
198 // delete all the elements
199 clear();
200 // and delete the data structure
201 delete[] m_values;
202 m_values = 0;
203 }
204
205 // as usual, implement the copy constructor i.t.o. the assignment operator
206
207 template<typename K, typename T, class H, class E>
208 hash<K,T,H,E>::hash(const hash<K,T,H,E>& right) :
209 m_rehash(right.m_rehash), m_bins(right.m_bins), m_size(0), m_values(0)
210 {
211 m_values = new hash_element<K,T,H,E>*[right.m_bins];
212 // copy the rehash behaviour as well as the size
213 for (unsigned i = 0; i < m_bins; i++)
214 m_values[i] = 0;
215 *this = right;
216 }
217
218 // assignment operator
219 // this is done by copying the elements
220 // the source and target hashes can be different sizes
221 // the hash is self-copy safe, i.e. it is legal to say x = x;
222
223 template<typename K, typename T, class H, class E>
224 hash<K,T,H,E>& hash<K,T,H,E>::operator = (const hash<K,T,H,E>& r)
225 {
226 // make self-copy safe
227 if (&r == this) return *this;
228 // remove all the existing elements
229 clear();
230 // copy the elements across - remember that this is rehashing because the two
231 // tables can be different sizes so there is no quick way of doing this by
232 // copying the lists
233 for (hash_iterator<K,T,H,E,const std::pair<const K,T> > i = r.begin(); i != r.end(); ++i)
234 insert(i->first, i->second);
235 return *this;
236 }
237
238 // number of values in the hash
239 template<typename K, typename T, class H, class E>
240 bool hash<K,T,H,E>::empty(void) const
241 {
242 return m_size == 0;
243 }
244
245 template<typename K, typename T, class H, class E>
246 unsigned hash<K,T,H,E>::size(void) const
247 {
248 return m_size;
249 }
250
251 // equality
252 template<typename K, typename T, class H, class E>
253 bool hash<K,T,H,E>::operator == (const hash<K,T,H,E>& right) const
254 {
255 // this table is the same as the right table if they are the same table!
256 if (&right == this) return true;
257 // they must be the same size to be equal
258 if (m_size != right.m_size) return false;
259 // now every key in this must be in right and have the same data
260 for (hash_iterator<K,T,H,E,const std::pair<const K,T> > i = begin(); i != end(); i++)
261 {
262 hash_iterator<K,T,H,E,const std::pair<const K,T> > found = right.find(i->first);
263 if (found == right.end()) return false;
264 if (!(i->second == found->second)) return false;
265 }
266 return true;
267 }
268
269 // set up the hash to auto-rehash at a specific size
270 // setting the rehash size to 0 forces manual rehashing
271 template<typename K, typename T, class H, class E>
272 void hash<K,T,H,E>::auto_rehash(void)
273 {
274 m_rehash = m_bins;
275 }
276
277 template<typename K, typename T, class H, class E>
278 void hash<K,T,H,E>::manual_rehash(void)
279 {
280 m_rehash = 0;
281 }
282
283 // the rehash function
284 // builds a new hash table and moves the elements (without copying) from the old to the new
285 // I store the un-modulused hash value in the element for more efficient rehashing
286 // passing 0 to the bins parameter does auto-rehashing
287 // passing any other value forces the number of bins
288
289 template<typename K, typename T, class H, class E>
290 void hash<K,T,H,E>::rehash(unsigned bins)
291 {
292 // user specified size: just take the user's value
293 // auto calculate: if the load is high, increase the size; else do nothing
294 unsigned new_bins = bins ? bins : m_bins;
295 if (bins == 0 && m_size > 0)
296 {
297 // these numbers are pretty arbitrary
298 // TODO - make them user-customisable?
299 float load = loading();
300 if (load > 2.0)
301 new_bins = (unsigned)(m_bins * load);
302 else if (load > 1.0)
303 new_bins = m_bins * 2;
304 }
305 if (new_bins == m_bins) return;
306 // set the new rehashing point if auto-rehashing is on
307 if (m_rehash) m_rehash = new_bins;
308 // move aside the old structure
309 hash_element<K,T,H,E>** old_values = m_values;
310 unsigned old_bins = m_bins;
311 // create a replacement structure
312 m_values = new hash_element<K,T,H,E>*[new_bins];
313 for (unsigned i = 0; i < new_bins; i++)
314 m_values[i] = 0;
315 m_bins = new_bins;
316 // move all the old elements across, rehashing each one
317 for (unsigned j = 0; j < old_bins; j++)
318 {
319 while(old_values[j])
320 {
321 // unhook from the old structure
322 hash_element<K,T,H,E>* current = old_values[j];
323 old_values[j] = current->m_next;
324 // rehash using the stored hash value
325 unsigned bin = current->bin();
326 // hook it into the new structure
327 current->m_next = m_values[bin];
328 m_values[bin] = current;
329 }
330 }
331 // now delete the old structure
332 delete[] old_values;
333 }
334
335 // the loading is the average number of elements per bin
336 // this simplifies to the total elements divided by the number of bins
337
338 template<typename K, typename T, class H, class E>
339 float hash<K,T,H,E>::loading(void) const
340 {
341 return (float)m_size / (float)m_bins;
342 }
343
344 // remove all elements from the table
345
346 template<typename K, typename T, class H, class E>
347 void hash<K,T,H,E>::erase(void)
348 {
349 // unhook the list elements and destroy them
350 for (unsigned i = 0; i < m_bins; i++)
351 {
352 hash_element<K,T,H,E>* current = m_values[i];
353 while(current)
354 {
355 hash_element<K,T,H,E>* next = current->m_next;
356 delete current;
357 current = next;
358 }
359 m_values[i] = 0;
360 }
361 m_size = 0;
362 }
363
364 // test for whether a key is present in the table
365
366 template<typename K, typename T, class H, class E>
367 bool hash<K,T,H,E>::present(const K& key) const
368 {
369 return find(key) != end();
370 }
371
372 template<typename K, typename T, class H, class E>
373 TYPENAME hash<K,T,H,E>::size_type hash<K,T,H,E>::count(const K& key) const
374 {
375 return present() ? 1 : 0;
376 }
377
378 // add a key and data element to the table - defined in terms of the general-purpose pair insert function
379
380 template<typename K, typename T, class H, class E>
381 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::insert(const K& key, const T& data)
382 {
383 return insert(std::pair<const K,T>(key,data)).first;
384 }
385
386 // insert a key/data pair into the table
387 // this removes any old value with the same key since there is no multihash functionality
388
389 template<typename K, typename T, class H, class E>
390 std::pair<TYPENAME hash<K,T,H,E>::iterator, bool> hash<K,T,H,E>::insert(const std::pair<const K,T>& value)
391 {
392 // if auto-rehash is enabled, implement the auto-rehash before inserting the new value
393 // the table is rehashed if this insertion makes the loading exceed 1.0
394 if (m_rehash && (m_size >= m_rehash)) rehash();
395 // calculate the new hash value
396 unsigned hash_value_full = H()(value.first);
397 unsigned bin = hash_value_full % m_bins;
398 bool inserted = true;
399 // unhook any previous value with this key
400 // this has been inlined from erase(key) so that the hash value is not calculated twice
401 hash_element<K,T,H,E>* previous = 0;
402 for (hash_element<K,T,H,E>* current = m_values[bin]; current; previous = current, current = current->m_next)
403 {
404 // first check the full stored hash value
405 if (current->m_hash != hash_value_full) continue;
406
407 // next try the equality operator
408 if (!E()(current->m_value.first, value.first)) continue;
409
410 // unhook this value and destroy it
411 if (previous)
412 previous->m_next = current->m_next;
413 else
414 m_values[bin] = current->m_next;
415 delete current;
416 m_size--;
417
418 // we've overwritten a previous value
419 inserted = false;
420
421 // assume there can only be one match so we can give up now
422 break;
423 }
424 // now hook in a new list element at the start of the list for this hash value
425 hash_element<K,T,H,E>* new_item = new hash_element<K,T,H,E>(this, value, hash_value_full);
426 new_item->m_next = m_values[bin];
427 m_values[bin] = new_item;
428 // increment the size count
429 m_size++;
430 // construct an iterator from the list node, and return whether inserted
431 return std::make_pair(hash_iterator<K,T,H,E,std::pair<const K,T> >(new_item), inserted);
432 }
433
434 // insert a key with an empty data field ready to be filled in later
435
436 template<typename K, typename T, class H, class E>
437 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::insert(const K& key)
438 {
439 return insert(key,T());
440 }
441
442 // remove a key from the table - return true if the key was found and removed, false if it wasn't present
443
444 template<typename K, typename T, class H, class E>
445 unsigned hash<K,T,H,E>::erase(const K& key)
446 {
447 unsigned hash_value_full = H()(key);
448 unsigned bin = hash_value_full % m_bins;
449 // scan the list for an element with this key
450 // need to keep a previous pointer because the lists are single-linked
451 hash_element<K,T,H,E>* previous = 0;
452 for (hash_element<K,T,H,E>* current = m_values[bin]; current; previous = current, current = current->m_next)
453 {
454 // first check the full stored hash value
455 if (current->m_hash != hash_value_full) continue;
456
457 // next try the equality operator
458 if (!E()(current->m_value.first, key)) continue;
459
460 // found this key, so unhook the element from the list
461 if (previous)
462 previous->m_next = current->m_next;
463 else
464 m_values[bin] = current->m_next;
465 // destroy it
466 delete current;
467 // remember to maintain the size count
468 m_size--;
469 return 1;
470 }
471 return 0;
472 }
473
474 // remove an element from the hash table using an iterator (std::map equivalent)
475 template<typename K, typename T, class H, class E>
476 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::erase(TYPENAME hash<K,T,H,E>::iterator it)
477 {
478 // work out what the next iterator is in order to return it later
479 TYPENAME hash<K,T,H,E>::iterator next(it);
480 ++next;
481 // we now need to find where this item is - made difficult by the use of
482 // single-linked lists which means I have to search through the bin from
483 // the top in order to unlink from the list.
484 unsigned hash_value_full = it.node()->m_hash;
485 unsigned bin = hash_value_full % m_bins;
486 // scan the list for this element
487 // need to keep a previous pointer because the lists are single-linked
488 hash_element<K,T,H,E>* previous = 0;
489 for (hash_element<K,T,H,E>* current = m_values[bin]; current; previous = current, current = current->m_next)
490 {
491 // direct test on the address of the element
492 if (current != it.node()) continue;
493 // found this iterator, so unhook the element from the list
494 if (previous)
495 previous->m_next = current->m_next;
496 else
497 m_values[bin] = current->m_next;
498 // destroy it
499 delete current;
500 current = 0;
501 // remember to maintain the size count
502 m_size--;
503 break;
504 }
505 return next;
506 }
507
508 template<typename K, typename T, class H, class E>
509 void hash<K,T,H,E>::clear(void)
510 {
511 erase();
512 }
513
514 // search for a key in the table and return an iterator to it
515 // if the search fails, returns an end() iterator
516
517 template<typename K, typename T, class H, class E>
518 TYPENAME hash<K,T,H,E>::const_iterator hash<K,T,H,E>::find(const K& key) const
519 {
520 // scan the list for this key's hash value for the element with a matching key
521 unsigned hash_value_full = H()(key);
522 unsigned bin = hash_value_full % m_bins;
523 for (hash_element<K,T,H,E>* current = m_values[bin]; current; current = current->m_next)
524 {
525 if (current->m_hash == hash_value_full && E()(current->m_value.first, key))
526 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(current);
527 }
528 return end();
529 }
530
531 template<typename K, typename T, class H, class E>
532 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::find(const K& key)
533 {
534 // scan the list for this key's hash value for the element with a matching key
535 unsigned hash_value_full = H()(key);
536 unsigned bin = hash_value_full % m_bins;
537 for (hash_element<K,T,H,E>* current = m_values[bin]; current; current = current->m_next)
538 {
539 if (current->m_hash == hash_value_full && E()(current->m_value.first, key))
540 return hash_iterator<K,T,H,E,std::pair<const K,T> >(current);
541 }
542 return end();
543 }
544
545 // table lookup by key using the index operator[], returning a reference to the data field, not an iterator
546 // this is rather like the std::map's [] operator
547 // the difference is that I have a const and non-const version
548 // the const version will not create the element if not present already, but the non-const version will
549 // the non-const version is compatible with the behaviour of the map
550
551 template<typename K, typename T, class H, class E>
552 const T& hash<K,T,H,E>::operator[] (const K& key) const throw(std::out_of_range)
553 {
554 // this const version cannot change the hash, so has to raise an exception if the key is missing
555 hash_iterator<K,T,H,E,const std::pair<const K,T> > found = find(key);
556 if (found == end())
557 throw std::out_of_range("key not found in stlplus::hash::operator[]");
558 return found->second;
559 }
560
561 template<typename K, typename T, class H, class E>
562 T& hash<K,T,H,E>::operator[] (const K& key)
563 {
564 // this non-const version can change the hash, so creates a new element if the key is missing
565 hash_iterator<K,T,H,E,std::pair<const K,T> > found = find(key);
566 if (found == end())
567 found = insert(key);
568 return found->second;
569 }
570
571 // iterators
572
573 template<typename K, typename T, class H, class E>
574 TYPENAME hash<K,T,H,E>::const_iterator hash<K,T,H,E>::begin(void) const
575 {
576 // find the first element
577 for (unsigned bin = 0; bin < m_bins; bin++)
578 if (m_values[bin])
579 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(m_values[bin]);
580 // if the hash is empty, return the end iterator
581 return end();
582 }
583
584 template<typename K, typename T, class H, class E>
585 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::begin(void)
586 {
587 // find the first element
588 for (unsigned bin = 0; bin < m_bins; bin++)
589 if (m_values[bin])
590 return hash_iterator<K,T,H,E,std::pair<const K,T> >(m_values[bin]);
591 // if the hash is empty, return the end iterator
592 return end();
593 }
594
595 template<typename K, typename T, class H, class E>
596 TYPENAME hash<K,T,H,E>::const_iterator hash<K,T,H,E>::end(void) const
597 {
598 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(this);
599 }
600
601 template<typename K, typename T, class H, class E>
602 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::end(void)
603 {
604 return hash_iterator<K,T,H,E,std::pair<const K,T> >(this);
605 }
606
607 template<typename K, typename T, class H, class E>
608 void hash<K,T,H,E>::debug_report(std::ostream& str) const
609 {
610 // calculate some stats first
611 unsigned occupied = 0;
612 unsigned min_in_bin = m_size;
613 unsigned max_in_bin = 0;
614 for (unsigned i = 0; i < m_bins; i++)
615 {
616 if (m_values[i]) occupied++;
617 unsigned count = 0;
618 for (hash_element<K,T,H,E>* item = m_values[i]; item; item = item->m_next) count++;
619 if (count > max_in_bin) max_in_bin = count;
620 if (count < min_in_bin) min_in_bin = count;
621 }
622 // now print the table
623 str << "------------------------------------------------------------------------" << std::endl;
624 str << "| size: " << m_size << std::endl;
625 str << "| bins: " << m_bins << std::endl;
626 str << "| loading: " << loading() << " ";
627 if (m_rehash)
628 str << "auto-rehash at " << m_rehash << std::endl;
629 else
630 str << "manual rehash" << std::endl;
631 str << "| occupied: " << occupied
632 << std::fixed << " (" << (100.0*(float)occupied/(float)m_bins) << "%)" << std::scientific
633 << ", min = " << min_in_bin << ", max = " << max_in_bin << std::endl;
634 str << "|-----------------------------------------------------------------------" << std::endl;
635 str << "| bin 0 1 2 3 4 5 6 7 8 9" << std::endl;
636 str << "| ---------------------------------------------------------------";
637 for (unsigned j = 0; j < m_bins; j++)
638 {
639 if (j % 10 == 0)
640 {
641 str << std::endl;
642 str << "| " << std::setw(6) << std::right << (j/10*10) << std::left << " |";
643 }
644 unsigned count = 0;
645 for (hash_element<K,T,H,E>* item = m_values[j]; item; item = item->m_next) count++;
646 if (!count)
647 str << " .";
648 else
649 str << std::setw(6) << std::right << count << std::left;
650 }
651 str << std::endl;
652 str << "------------------------------------------------------------------------" << std::endl;
653 }
654
655 ////////////////////////////////////////////////////////////////////////////////
656
657 } // end namespace stlplus
658
This page took 0.059865 seconds and 3 git commands to generate.