Added <algorithm> include to hashlib.h
[yosys.git] / kernel / hashlib.h
1 // This is free and unencumbered software released into the public domain.
2 //
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
6 // means.
7
8 // -------------------------------------------------------
9 // Written by Clifford Wolf <clifford@clifford.at> in 2014
10 // -------------------------------------------------------
11
12 #ifndef HASHLIB_H
13
14 #include <stdexcept>
15 #include <algorithm>
16 #include <string>
17 #include <vector>
18
19 namespace hashlib {
20
21 const int hashtable_size_trigger = 2;
22 const int hashtable_size_factor = 3;
23
24 // The XOR version of DJB2
25 inline unsigned int mkhash(unsigned int a, unsigned int b) {
26 return ((a << 5) + a) ^ b;
27 }
28
29 // traditionally 5381 is used as starting value for the djb2 hash
30 const unsigned int mkhash_init = 5381;
31
32 // The ADD version of DJB2
33 // (usunsigned int mkhashe this version for cache locality in b)
34 inline unsigned int mkhash_add(unsigned int a, unsigned int b) {
35 return ((a << 5) + a) + b;
36 }
37
38 inline unsigned int mkhash_xorshift(unsigned int a) {
39 if (sizeof(a) == 4) {
40 a ^= a << 13;
41 a ^= a >> 17;
42 a ^= a << 5;
43 } else if (sizeof(a) == 8) {
44 a ^= a << 13;
45 a ^= a >> 7;
46 a ^= a << 17;
47 } else
48 throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints");
49 return a;
50 }
51
52 template<typename T> struct hash_ops {
53 static inline bool cmp(const T &a, const T &b) {
54 return a == b;
55 }
56 static inline unsigned int hash(const T &a) {
57 return a.hash();
58 }
59 };
60
61 template<> struct hash_ops<int> {
62 template<typename T>
63 static inline bool cmp(T a, T b) {
64 return a == b;
65 }
66 template<typename T>
67 static inline unsigned int hash(T a) {
68 return a;
69 }
70 };
71
72 template<> struct hash_ops<std::string> {
73 static inline bool cmp(const std::string &a, const std::string &b) {
74 return a == b;
75 }
76 static inline unsigned int hash(const std::string &a) {
77 unsigned int v = 0;
78 for (auto c : a)
79 v = mkhash(v, c);
80 return v;
81 }
82 };
83
84 template<typename P, typename Q> struct hash_ops<std::pair<P, Q>> {
85 static inline bool cmp(std::pair<P, Q> a, std::pair<P, Q> b) {
86 return a == b;
87 }
88 static inline unsigned int hash(std::pair<P, Q> a) {
89 hash_ops<P> p_ops;
90 hash_ops<Q> q_ops;
91 return mkhash(p_ops.hash(a.first), q_ops.hash(a.second));
92 }
93 };
94
95 template<typename T> struct hash_ops<std::vector<T>> {
96 static inline bool cmp(std::vector<T> a, std::vector<T> b) {
97 return a == b;
98 }
99 static inline unsigned int hash(std::vector<T> a) {
100 hash_ops<T> t_ops;
101 unsigned int h = mkhash_init;
102 for (auto k : a)
103 h = mkhash(h, t_ops.hash(k));
104 return h;
105 }
106 };
107
108 struct hash_cstr_ops {
109 static inline bool cmp(const char *a, const char *b) {
110 for (int i = 0; a[i] || b[i]; i++)
111 if (a[i] != b[i])
112 return false;
113 return true;
114 }
115 static inline unsigned int hash(const char *a) {
116 unsigned int hash = mkhash_init;
117 while (*a)
118 hash = mkhash(hash, *(a++));
119 return hash;
120 }
121 };
122
123 struct hash_ptr_ops {
124 static inline bool cmp(const void *a, const void *b) {
125 return a == b;
126 }
127 static inline unsigned int hash(const void *a) {
128 return (unsigned long)a;
129 }
130 };
131
132 struct hash_obj_ops {
133 static inline bool cmp(const void *a, const void *b) {
134 return a == b;
135 }
136 template<typename T>
137 static inline unsigned int hash(const T *a) {
138 return a->hash();
139 }
140 };
141
142 inline int hashtable_size(int min_size)
143 {
144 static std::vector<int> zero_and_some_primes = {
145 0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677,
146 853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289,
147 12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281,
148 120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697,
149 897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037,
150 5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411,
151 25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239,
152 121590311, 151987889, 189984863, 237481091, 296851369, 371064217
153 };
154
155 for (auto p : zero_and_some_primes)
156 if (p >= min_size) return p;
157
158 if (sizeof(int) == 4)
159 throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables.");
160
161 for (auto p : zero_and_some_primes)
162 if (100129 * p > min_size) return 100129 * p;
163
164 throw std::length_error("hash table exceeded maximum size.");
165 }
166
167 template<typename K, typename T, typename OPS = hash_ops<K>> class dict;
168 template<typename K, int offset = 0, typename OPS = hash_ops<K>> class idict;
169 template<typename K, typename OPS = hash_ops<K>> class pool;
170
171 template<typename K, typename T, typename OPS>
172 class dict
173 {
174 struct entry_t
175 {
176 std::pair<K, T> udata;
177 int next;
178
179 entry_t() { }
180 entry_t(const std::pair<K, T> &udata, int next) : udata(udata), next(next) { }
181 };
182
183 std::vector<int> hashtable;
184 std::vector<entry_t> entries;
185 OPS ops;
186
187 #if 0
188 static inline void do_assert(bool cond) {
189 if (!cond) throw std::runtime_error("dict<> assert failed.");
190 }
191 #else
192 static inline void do_assert(bool) { }
193 #endif
194
195 int do_hash(const K &key) const
196 {
197 unsigned int hash = 0;
198 if (!hashtable.empty())
199 hash = ops.hash(key) % (unsigned int)(hashtable.size());
200 return hash;
201 }
202
203 void do_rehash()
204 {
205 hashtable.clear();
206 hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1);
207
208 for (int i = 0; i < int(entries.size()); i++) {
209 do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size()));
210 int hash = do_hash(entries[i].udata.first);
211 entries[i].next = hashtable[hash];
212 hashtable[hash] = i;
213 }
214 }
215
216 int do_erase(int index, int hash)
217 {
218 do_assert(index < int(entries.size()));
219 if (hashtable.empty() || index < 0)
220 return 0;
221
222 int k = hashtable[hash];
223 if (k == index) {
224 hashtable[hash] = entries[index].next;
225 } else {
226 while (entries[k].next != index) {
227 k = entries[k].next;
228 do_assert(0 <= k && k < int(entries.size()));
229 }
230 entries[k].next = entries[index].next;
231 }
232
233 int back_idx = entries.size()-1;
234
235 if (index != back_idx)
236 {
237 int back_hash = do_hash(entries[back_idx].udata.first);
238
239 k = hashtable[back_hash];
240 if (k == back_idx) {
241 hashtable[back_hash] = index;
242 } else {
243 while (entries[k].next != back_idx) {
244 k = entries[k].next;
245 do_assert(0 <= k && k < int(entries.size()));
246 }
247 entries[k].next = index;
248 }
249
250 entries[index] = std::move(entries[back_idx]);
251 }
252
253 entries.pop_back();
254
255 if (entries.empty())
256 hashtable.clear();
257
258 return 1;
259 }
260
261 int do_lookup(const K &key, int &hash) const
262 {
263 if (hashtable.empty())
264 return -1;
265
266 if (entries.size() * hashtable_size_trigger > hashtable.size()) {
267 ((dict*)this)->do_rehash();
268 hash = do_hash(key);
269 }
270
271 int index = hashtable[hash];
272
273 while (index >= 0 && !ops.cmp(entries[index].udata.first, key)) {
274 index = entries[index].next;
275 do_assert(-1 <= index && index < int(entries.size()));
276 }
277
278 return index;
279 }
280
281 int do_insert(const std::pair<K, T> &value, int &hash)
282 {
283 if (hashtable.empty()) {
284 entries.push_back(entry_t(value, -1));
285 do_rehash();
286 hash = do_hash(value.first);
287 } else {
288 entries.push_back(entry_t(value, hashtable[hash]));
289 hashtable[hash] = entries.size() - 1;
290 }
291 return entries.size() - 1;
292 }
293
294 public:
295 class const_iterator : public std::iterator<std::forward_iterator_tag, std::pair<K, T>>
296 {
297 friend class dict;
298 protected:
299 const dict *ptr;
300 int index;
301 const_iterator(const dict *ptr, int index) : ptr(ptr), index(index) { }
302 public:
303 const_iterator() { }
304 const_iterator operator++() { index--; return *this; }
305 bool operator<(const const_iterator &other) const { return index > other.index; }
306 bool operator==(const const_iterator &other) const { return index == other.index; }
307 bool operator!=(const const_iterator &other) const { return index != other.index; }
308 const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; }
309 const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; }
310 };
311
312 class iterator : public std::iterator<std::forward_iterator_tag, std::pair<K, T>>
313 {
314 friend class dict;
315 protected:
316 dict *ptr;
317 int index;
318 iterator(dict *ptr, int index) : ptr(ptr), index(index) { }
319 public:
320 iterator() { }
321 iterator operator++() { index--; return *this; }
322 bool operator<(const iterator &other) const { return index > other.index; }
323 bool operator==(const iterator &other) const { return index == other.index; }
324 bool operator!=(const iterator &other) const { return index != other.index; }
325 std::pair<K, T> &operator*() { return ptr->entries[index].udata; }
326 std::pair<K, T> *operator->() { return &ptr->entries[index].udata; }
327 const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; }
328 const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; }
329 operator const_iterator() const { return const_iterator(ptr, index); }
330 };
331
332 dict()
333 {
334 }
335
336 dict(const dict &other)
337 {
338 entries = other.entries;
339 do_rehash();
340 }
341
342 dict(dict &&other)
343 {
344 swap(other);
345 }
346
347 dict &operator=(const dict &other) {
348 entries = other.entries;
349 do_rehash();
350 return *this;
351 }
352
353 dict &operator=(dict &&other) {
354 clear();
355 swap(other);
356 return *this;
357 }
358
359 dict(const std::initializer_list<std::pair<K, T>> &list)
360 {
361 for (auto &it : list)
362 insert(it);
363 }
364
365 template<class InputIterator>
366 dict(InputIterator first, InputIterator last)
367 {
368 insert(first, last);
369 }
370
371 template<class InputIterator>
372 void insert(InputIterator first, InputIterator last)
373 {
374 for (; first != last; ++first)
375 insert(*first);
376 }
377
378 std::pair<iterator, bool> insert(const std::pair<K, T> &value)
379 {
380 int hash = do_hash(value.first);
381 int i = do_lookup(value.first, hash);
382 if (i >= 0)
383 return std::pair<iterator, bool>(iterator(this, i), false);
384 i = do_insert(value, hash);
385 return std::pair<iterator, bool>(iterator(this, i), true);
386 }
387
388 int erase(const K &key)
389 {
390 int hash = do_hash(key);
391 int index = do_lookup(key, hash);
392 return do_erase(index, hash);
393 }
394
395 iterator erase(iterator it)
396 {
397 int hash = do_hash(it->first);
398 do_erase(it.index, hash);
399 return ++it;
400 }
401
402 int count(const K &key) const
403 {
404 int hash = do_hash(key);
405 int i = do_lookup(key, hash);
406 return i < 0 ? 0 : 1;
407 }
408
409 int count(const K &key, const_iterator it) const
410 {
411 int hash = do_hash(key);
412 int i = do_lookup(key, hash);
413 return i < 0 || i > it.index ? 0 : 1;
414 }
415
416 iterator find(const K &key)
417 {
418 int hash = do_hash(key);
419 int i = do_lookup(key, hash);
420 if (i < 0)
421 return end();
422 return iterator(this, i);
423 }
424
425 const_iterator find(const K &key) const
426 {
427 int hash = do_hash(key);
428 int i = do_lookup(key, hash);
429 if (i < 0)
430 return end();
431 return const_iterator(this, i);
432 }
433
434 T& at(const K &key)
435 {
436 int hash = do_hash(key);
437 int i = do_lookup(key, hash);
438 if (i < 0)
439 throw std::out_of_range("dict::at()");
440 return entries[i].udata.second;
441 }
442
443 const T& at(const K &key) const
444 {
445 int hash = do_hash(key);
446 int i = do_lookup(key, hash);
447 if (i < 0)
448 throw std::out_of_range("dict::at()");
449 return entries[i].udata.second;
450 }
451
452 T& operator[](const K &key)
453 {
454 int hash = do_hash(key);
455 int i = do_lookup(key, hash);
456 if (i < 0)
457 i = do_insert(std::pair<K, T>(key, T()), hash);
458 return entries[i].udata.second;
459 }
460
461 template<typename Compare = std::less<K>>
462 void sort(Compare comp = Compare())
463 {
464 std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata.first, a.udata.first); });
465 do_rehash();
466 }
467
468 void swap(dict &other)
469 {
470 hashtable.swap(other.hashtable);
471 entries.swap(other.entries);
472 }
473
474 bool operator==(const dict &other) const {
475 if (size() != other.size())
476 return false;
477 for (auto &it : entries) {
478 auto oit = other.find(it.udata.first);
479 if (oit == other.end() || oit->second != it.udata.second)
480 return false;
481 }
482 return true;
483 }
484
485 bool operator!=(const dict &other) const {
486 return !(*this == other);
487 }
488
489 size_t size() const { return entries.size(); }
490 bool empty() const { return entries.empty(); }
491 void clear() { hashtable.clear(); entries.clear(); }
492
493 iterator begin() { return iterator(this, int(entries.size())-1); }
494 iterator end() { return iterator(nullptr, -1); }
495
496 const_iterator begin() const { return const_iterator(this, int(entries.size())-1); }
497 const_iterator end() const { return const_iterator(nullptr, -1); }
498 };
499
500 template<typename K, typename OPS>
501 class pool
502 {
503 template<typename, int, typename> friend class idict;
504
505 protected:
506 struct entry_t
507 {
508 K udata;
509 int next;
510
511 entry_t() { }
512 entry_t(const K &udata, int next) : udata(udata), next(next) { }
513 };
514
515 std::vector<int> hashtable;
516 std::vector<entry_t> entries;
517 OPS ops;
518
519 #if 0
520 static inline void do_assert(bool cond) {
521 if (!cond) throw std::runtime_error("pool<> assert failed.");
522 }
523 #else
524 static inline void do_assert(bool) { }
525 #endif
526
527 int do_hash(const K &key) const
528 {
529 unsigned int hash = 0;
530 if (!hashtable.empty())
531 hash = ops.hash(key) % (unsigned int)(hashtable.size());
532 return hash;
533 }
534
535 void do_rehash()
536 {
537 hashtable.clear();
538 hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1);
539
540 for (int i = 0; i < int(entries.size()); i++) {
541 do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size()));
542 int hash = do_hash(entries[i].udata);
543 entries[i].next = hashtable[hash];
544 hashtable[hash] = i;
545 }
546 }
547
548 int do_erase(int index, int hash)
549 {
550 do_assert(index < int(entries.size()));
551 if (hashtable.empty() || index < 0)
552 return 0;
553
554 int k = hashtable[hash];
555 if (k == index) {
556 hashtable[hash] = entries[index].next;
557 } else {
558 while (entries[k].next != index) {
559 k = entries[k].next;
560 do_assert(0 <= k && k < int(entries.size()));
561 }
562 entries[k].next = entries[index].next;
563 }
564
565 int back_idx = entries.size()-1;
566
567 if (index != back_idx)
568 {
569 int back_hash = do_hash(entries[back_idx].udata);
570
571 k = hashtable[back_hash];
572 if (k == back_idx) {
573 hashtable[back_hash] = index;
574 } else {
575 while (entries[k].next != back_idx) {
576 k = entries[k].next;
577 do_assert(0 <= k && k < int(entries.size()));
578 }
579 entries[k].next = index;
580 }
581
582 entries[index] = std::move(entries[back_idx]);
583 }
584
585 entries.pop_back();
586
587 if (entries.empty())
588 hashtable.clear();
589
590 return 1;
591 }
592
593 int do_lookup(const K &key, int &hash) const
594 {
595 if (hashtable.empty())
596 return -1;
597
598 if (entries.size() * hashtable_size_trigger > hashtable.size()) {
599 ((pool*)this)->do_rehash();
600 hash = do_hash(key);
601 }
602
603 int index = hashtable[hash];
604
605 while (index >= 0 && !ops.cmp(entries[index].udata, key)) {
606 index = entries[index].next;
607 do_assert(-1 <= index && index < int(entries.size()));
608 }
609
610 return index;
611 }
612
613 int do_insert(const K &value, int &hash)
614 {
615 if (hashtable.empty()) {
616 entries.push_back(entry_t(value, -1));
617 do_rehash();
618 hash = do_hash(value);
619 } else {
620 entries.push_back(entry_t(value, hashtable[hash]));
621 hashtable[hash] = entries.size() - 1;
622 }
623 return entries.size() - 1;
624 }
625
626 public:
627 class const_iterator : public std::iterator<std::forward_iterator_tag, K>
628 {
629 friend class pool;
630 protected:
631 const pool *ptr;
632 int index;
633 const_iterator(const pool *ptr, int index) : ptr(ptr), index(index) { }
634 public:
635 const_iterator() { }
636 const_iterator operator++() { index--; return *this; }
637 bool operator==(const const_iterator &other) const { return index == other.index; }
638 bool operator!=(const const_iterator &other) const { return index != other.index; }
639 const K &operator*() const { return ptr->entries[index].udata; }
640 const K *operator->() const { return &ptr->entries[index].udata; }
641 };
642
643 class iterator : public std::iterator<std::forward_iterator_tag, K>
644 {
645 friend class pool;
646 protected:
647 pool *ptr;
648 int index;
649 iterator(pool *ptr, int index) : ptr(ptr), index(index) { }
650 public:
651 iterator() { }
652 iterator operator++() { index--; return *this; }
653 bool operator==(const iterator &other) const { return index == other.index; }
654 bool operator!=(const iterator &other) const { return index != other.index; }
655 K &operator*() { return ptr->entries[index].udata; }
656 K *operator->() { return &ptr->entries[index].udata; }
657 const K &operator*() const { return ptr->entries[index].udata; }
658 const K *operator->() const { return &ptr->entries[index].udata; }
659 operator const_iterator() const { return const_iterator(ptr, index); }
660 };
661
662 pool()
663 {
664 }
665
666 pool(const pool &other)
667 {
668 entries = other.entries;
669 do_rehash();
670 }
671
672 pool(pool &&other)
673 {
674 swap(other);
675 }
676
677 pool &operator=(const pool &other) {
678 entries = other.entries;
679 do_rehash();
680 return *this;
681 }
682
683 pool &operator=(pool &&other) {
684 clear();
685 swap(other);
686 return *this;
687 }
688
689 pool(const std::initializer_list<K> &list)
690 {
691 for (auto &it : list)
692 insert(it);
693 }
694
695 template<class InputIterator>
696 pool(InputIterator first, InputIterator last)
697 {
698 insert(first, last);
699 }
700
701 template<class InputIterator>
702 void insert(InputIterator first, InputIterator last)
703 {
704 for (; first != last; ++first)
705 insert(*first);
706 }
707
708 std::pair<iterator, bool> insert(const K &value)
709 {
710 int hash = do_hash(value);
711 int i = do_lookup(value, hash);
712 if (i >= 0)
713 return std::pair<iterator, bool>(iterator(this, i), false);
714 i = do_insert(value, hash);
715 return std::pair<iterator, bool>(iterator(this, i), true);
716 }
717
718 int erase(const K &key)
719 {
720 int hash = do_hash(key);
721 int index = do_lookup(key, hash);
722 return do_erase(index, hash);
723 }
724
725 iterator erase(iterator it)
726 {
727 int hash = do_hash(*it);
728 do_erase(it.index, hash);
729 return ++it;
730 }
731
732 int count(const K &key) const
733 {
734 int hash = do_hash(key);
735 int i = do_lookup(key, hash);
736 return i < 0 ? 0 : 1;
737 }
738
739 int count(const K &key, const_iterator it) const
740 {
741 int hash = do_hash(key);
742 int i = do_lookup(key, hash);
743 return i < 0 || i > it.index ? 0 : 1;
744 }
745
746 iterator find(const K &key)
747 {
748 int hash = do_hash(key);
749 int i = do_lookup(key, hash);
750 if (i < 0)
751 return end();
752 return iterator(this, i);
753 }
754
755 const_iterator find(const K &key) const
756 {
757 int hash = do_hash(key);
758 int i = do_lookup(key, hash);
759 if (i < 0)
760 return end();
761 return const_iterator(this, i);
762 }
763
764 bool operator[](const K &key)
765 {
766 int hash = do_hash(key);
767 int i = do_lookup(key, hash);
768 return i >= 0;
769 }
770
771 template<typename Compare = std::less<K>>
772 void sort(Compare comp = Compare())
773 {
774 std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata, a.udata); });
775 do_rehash();
776 }
777
778 void swap(pool &other)
779 {
780 hashtable.swap(other.hashtable);
781 entries.swap(other.entries);
782 }
783
784 bool operator==(const pool &other) const {
785 if (size() != other.size())
786 return false;
787 for (auto &it : entries)
788 if (!other.count(it.udata))
789 return false;
790 return true;
791 }
792
793 bool operator!=(const pool &other) const {
794 return !(*this == other);
795 }
796
797 size_t size() const { return entries.size(); }
798 bool empty() const { return entries.empty(); }
799 void clear() { hashtable.clear(); entries.clear(); }
800
801 iterator begin() { return iterator(this, int(entries.size())-1); }
802 iterator end() { return iterator(nullptr, -1); }
803
804 const_iterator begin() const { return const_iterator(this, int(entries.size())-1); }
805 const_iterator end() const { return const_iterator(nullptr, -1); }
806 };
807
808 template<typename K, int offset, typename OPS>
809 class idict
810 {
811 pool<K, OPS> database;
812
813 public:
814 typedef typename pool<K, OPS>::const_iterator const_iterator;
815
816 int operator()(const K &key)
817 {
818 int hash = database.do_hash(key);
819 int i = database.do_lookup(key, hash);
820 if (i < 0)
821 i = database.do_insert(key, hash);
822 return i + offset;
823 }
824
825 int at(const K &key) const
826 {
827 int hash = database.do_hash(key);
828 int i = database.do_lookup(key, hash);
829 if (i < 0)
830 throw std::out_of_range("idict::at()");
831 return i + offset;
832 }
833
834 int count(const K &key) const
835 {
836 int hash = database.do_hash(key);
837 int i = database.do_lookup(key, hash);
838 return i < 0 ? 0 : 1;
839 }
840
841 void expect(const K &key, int i)
842 {
843 int j = (*this)(key);
844 if (i != j)
845 throw std::out_of_range("idict::expect()");
846 }
847
848 const K &operator[](int index) const
849 {
850 return database.entries.at(index - offset).udata;
851 }
852
853 const_iterator begin() const { return database.begin(); }
854 const_iterator end() const { return database.end(); }
855 };
856
857 } /* namespace hashlib */
858
859 #endif