1 // This is free and unencumbered software released into the public domain.
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
8 // -------------------------------------------------------
9 // Written by Clifford Wolf <clifford@clifford.at> in 2014
10 // -------------------------------------------------------
21 const int hashtable_size_trigger
= 2;
22 const int hashtable_size_factor
= 3;
24 // The XOR version of DJB2
25 inline unsigned int mkhash(unsigned int a
, unsigned int b
) {
26 return ((a
<< 5) + a
) ^ b
;
29 // traditionally 5381 is used as starting value for the djb2 hash
30 const unsigned int mkhash_init
= 5381;
32 // The ADD version of DJB2
33 // (usunsigned int mkhashe this version for cache locality in b)
34 inline unsigned int mkhash_add(unsigned int a
, unsigned int b
) {
35 return ((a
<< 5) + a
) + b
;
38 inline unsigned int mkhash_xorshift(unsigned int a
) {
43 } else if (sizeof(a
) == 8) {
48 throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints");
52 template<typename T
> struct hash_ops
{
53 static inline bool cmp(const T
&a
, const T
&b
) {
56 static inline unsigned int hash(const T
&a
) {
61 template<> struct hash_ops
<int> {
63 static inline bool cmp(T a
, T b
) {
67 static inline unsigned int hash(T a
) {
72 template<> struct hash_ops
<std::string
> {
73 static inline bool cmp(const std::string
&a
, const std::string
&b
) {
76 static inline unsigned int hash(const std::string
&a
) {
84 template<typename P
, typename Q
> struct hash_ops
<std::pair
<P
, Q
>> {
85 static inline bool cmp(std::pair
<P
, Q
> a
, std::pair
<P
, Q
> b
) {
88 static inline unsigned int hash(std::pair
<P
, Q
> a
) {
91 return mkhash(p_ops
.hash(a
.first
), q_ops
.hash(a
.second
));
95 template<typename T
> struct hash_ops
<std::vector
<T
>> {
96 static inline bool cmp(std::vector
<T
> a
, std::vector
<T
> b
) {
99 static inline unsigned int hash(std::vector
<T
> a
) {
101 unsigned int h
= mkhash_init
;
103 h
= mkhash(h
, t_ops
.hash(k
));
108 struct hash_cstr_ops
{
109 static inline bool cmp(const char *a
, const char *b
) {
110 for (int i
= 0; a
[i
] || b
[i
]; i
++)
115 static inline unsigned int hash(const char *a
) {
116 unsigned int hash
= mkhash_init
;
118 hash
= mkhash(hash
, *(a
++));
123 struct hash_ptr_ops
{
124 static inline bool cmp(const void *a
, const void *b
) {
127 static inline unsigned int hash(const void *a
) {
128 return (unsigned long)a
;
132 struct hash_obj_ops
{
133 static inline bool cmp(const void *a
, const void *b
) {
137 static inline unsigned int hash(const T
*a
) {
142 inline int hashtable_size(int min_size
)
144 static std::vector
<int> zero_and_some_primes
= {
145 0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677,
146 853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289,
147 12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281,
148 120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697,
149 897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037,
150 5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411,
151 25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239,
152 121590311, 151987889, 189984863, 237481091, 296851369, 371064217
155 for (auto p
: zero_and_some_primes
)
156 if (p
>= min_size
) return p
;
158 if (sizeof(int) == 4)
159 throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables.");
161 for (auto p
: zero_and_some_primes
)
162 if (100129 * p
> min_size
) return 100129 * p
;
164 throw std::length_error("hash table exceeded maximum size.");
167 template<typename K
, typename T
, typename OPS
= hash_ops
<K
>> class dict
;
168 template<typename K
, int offset
= 0, typename OPS
= hash_ops
<K
>> class idict
;
169 template<typename K
, typename OPS
= hash_ops
<K
>> class pool
;
171 template<typename K
, typename T
, typename OPS
>
176 std::pair
<K
, T
> udata
;
180 entry_t(const std::pair
<K
, T
> &udata
, int next
) : udata(udata
), next(next
) { }
181 entry_t(std::pair
<K
, T
> &&udata
, int next
) : udata(std::move(udata
)), next(next
) { }
184 std::vector
<int> hashtable
;
185 std::vector
<entry_t
> entries
;
189 static inline void do_assert(bool) { }
191 static inline void do_assert(bool cond
) {
192 if (!cond
) throw std::runtime_error("dict<> assert failed.");
196 int do_hash(const K
&key
) const
198 unsigned int hash
= 0;
199 if (!hashtable
.empty())
200 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
207 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
209 for (int i
= 0; i
< int(entries
.size()); i
++) {
210 do_assert(-1 <= entries
[i
].next
&& entries
[i
].next
< int(entries
.size()));
211 int hash
= do_hash(entries
[i
].udata
.first
);
212 entries
[i
].next
= hashtable
[hash
];
217 int do_erase(int index
, int hash
)
219 do_assert(index
< int(entries
.size()));
220 if (hashtable
.empty() || index
< 0)
223 int k
= hashtable
[hash
];
224 do_assert(0 <= k
&& k
< int(entries
.size()));
227 hashtable
[hash
] = entries
[index
].next
;
229 while (entries
[k
].next
!= index
) {
231 do_assert(0 <= k
&& k
< int(entries
.size()));
233 entries
[k
].next
= entries
[index
].next
;
236 int back_idx
= entries
.size()-1;
238 if (index
!= back_idx
)
240 int back_hash
= do_hash(entries
[back_idx
].udata
.first
);
242 k
= hashtable
[back_hash
];
243 do_assert(0 <= k
&& k
< int(entries
.size()));
246 hashtable
[back_hash
] = index
;
248 while (entries
[k
].next
!= back_idx
) {
250 do_assert(0 <= k
&& k
< int(entries
.size()));
252 entries
[k
].next
= index
;
255 entries
[index
] = std::move(entries
[back_idx
]);
266 int do_lookup(const K
&key
, int &hash
) const
268 if (hashtable
.empty())
271 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
272 ((dict
*)this)->do_rehash();
276 int index
= hashtable
[hash
];
278 while (index
>= 0 && !ops
.cmp(entries
[index
].udata
.first
, key
)) {
279 index
= entries
[index
].next
;
280 do_assert(-1 <= index
&& index
< int(entries
.size()));
286 int do_insert(const K
&key
, int &hash
)
288 if (hashtable
.empty()) {
289 entries
.push_back(entry_t(std::pair
<K
, T
>(key
, T()), -1));
293 entries
.push_back(entry_t(std::pair
<K
, T
>(key
, T()), hashtable
[hash
]));
294 hashtable
[hash
] = entries
.size() - 1;
296 return entries
.size() - 1;
299 int do_insert(const std::pair
<K
, T
> &value
, int &hash
)
301 if (hashtable
.empty()) {
302 entries
.push_back(entry_t(value
, -1));
304 hash
= do_hash(value
.first
);
306 entries
.push_back(entry_t(value
, hashtable
[hash
]));
307 hashtable
[hash
] = entries
.size() - 1;
309 return entries
.size() - 1;
313 class const_iterator
: public std::iterator
<std::forward_iterator_tag
, std::pair
<K
, T
>>
319 const_iterator(const dict
*ptr
, int index
) : ptr(ptr
), index(index
) { }
322 const_iterator
operator++() { index
--; return *this; }
323 bool operator<(const const_iterator
&other
) const { return index
> other
.index
; }
324 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
325 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
326 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
327 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
330 class iterator
: public std::iterator
<std::forward_iterator_tag
, std::pair
<K
, T
>>
336 iterator(dict
*ptr
, int index
) : ptr(ptr
), index(index
) { }
339 iterator
operator++() { index
--; return *this; }
340 bool operator<(const iterator
&other
) const { return index
> other
.index
; }
341 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
342 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
343 std::pair
<K
, T
> &operator*() { return ptr
->entries
[index
].udata
; }
344 std::pair
<K
, T
> *operator->() { return &ptr
->entries
[index
].udata
; }
345 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
346 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
347 operator const_iterator() const { return const_iterator(ptr
, index
); }
354 dict(const dict
&other
)
356 entries
= other
.entries
;
365 dict
&operator=(const dict
&other
) {
366 entries
= other
.entries
;
371 dict
&operator=(dict
&&other
) {
377 dict(const std::initializer_list
<std::pair
<K
, T
>> &list
)
379 for (auto &it
: list
)
383 template<class InputIterator
>
384 dict(InputIterator first
, InputIterator last
)
389 template<class InputIterator
>
390 void insert(InputIterator first
, InputIterator last
)
392 for (; first
!= last
; ++first
)
396 std::pair
<iterator
, bool> insert(const K
&key
)
398 int hash
= do_hash(key
);
399 int i
= do_lookup(key
, hash
);
401 return std::pair
<iterator
, bool>(iterator(this, i
), false);
402 i
= do_insert(key
, hash
);
403 return std::pair
<iterator
, bool>(iterator(this, i
), true);
406 std::pair
<iterator
, bool> insert(const std::pair
<K
, T
> &value
)
408 int hash
= do_hash(value
.first
);
409 int i
= do_lookup(value
.first
, hash
);
411 return std::pair
<iterator
, bool>(iterator(this, i
), false);
412 i
= do_insert(value
, hash
);
413 return std::pair
<iterator
, bool>(iterator(this, i
), true);
416 int erase(const K
&key
)
418 int hash
= do_hash(key
);
419 int index
= do_lookup(key
, hash
);
420 return do_erase(index
, hash
);
423 iterator
erase(iterator it
)
425 int hash
= do_hash(it
->first
);
426 do_erase(it
.index
, hash
);
430 int count(const K
&key
) const
432 int hash
= do_hash(key
);
433 int i
= do_lookup(key
, hash
);
434 return i
< 0 ? 0 : 1;
437 int count(const K
&key
, const_iterator it
) const
439 int hash
= do_hash(key
);
440 int i
= do_lookup(key
, hash
);
441 return i
< 0 || i
> it
.index
? 0 : 1;
444 iterator
find(const K
&key
)
446 int hash
= do_hash(key
);
447 int i
= do_lookup(key
, hash
);
450 return iterator(this, i
);
453 const_iterator
find(const K
&key
) const
455 int hash
= do_hash(key
);
456 int i
= do_lookup(key
, hash
);
459 return const_iterator(this, i
);
464 int hash
= do_hash(key
);
465 int i
= do_lookup(key
, hash
);
467 throw std::out_of_range("dict::at()");
468 return entries
[i
].udata
.second
;
471 const T
& at(const K
&key
) const
473 int hash
= do_hash(key
);
474 int i
= do_lookup(key
, hash
);
476 throw std::out_of_range("dict::at()");
477 return entries
[i
].udata
.second
;
480 T
& operator[](const K
&key
)
482 int hash
= do_hash(key
);
483 int i
= do_lookup(key
, hash
);
485 i
= do_insert(std::pair
<K
, T
>(key
, T()), hash
);
486 return entries
[i
].udata
.second
;
489 template<typename Compare
= std::less
<K
>>
490 void sort(Compare comp
= Compare())
492 std::sort(entries
.begin(), entries
.end(), [comp
](const entry_t
&a
, const entry_t
&b
){ return comp(b
.udata
.first
, a
.udata
.first
); });
496 void swap(dict
&other
)
498 hashtable
.swap(other
.hashtable
);
499 entries
.swap(other
.entries
);
502 bool operator==(const dict
&other
) const {
503 if (size() != other
.size())
505 for (auto &it
: entries
) {
506 auto oit
= other
.find(it
.udata
.first
);
507 if (oit
== other
.end() || !(oit
->second
== it
.udata
.second
))
513 bool operator!=(const dict
&other
) const {
514 return !operator==(other
);
517 size_t size() const { return entries
.size(); }
518 bool empty() const { return entries
.empty(); }
519 void clear() { hashtable
.clear(); entries
.clear(); }
521 iterator
begin() { return iterator(this, int(entries
.size())-1); }
522 iterator
end() { return iterator(nullptr, -1); }
524 const_iterator
begin() const { return const_iterator(this, int(entries
.size())-1); }
525 const_iterator
end() const { return const_iterator(nullptr, -1); }
528 template<typename K
, typename OPS
>
531 template<typename
, int, typename
> friend class idict
;
540 entry_t(const K
&udata
, int next
) : udata(udata
), next(next
) { }
543 std::vector
<int> hashtable
;
544 std::vector
<entry_t
> entries
;
548 static inline void do_assert(bool) { }
550 static inline void do_assert(bool cond
) {
551 if (!cond
) throw std::runtime_error("pool<> assert failed.");
555 int do_hash(const K
&key
) const
557 unsigned int hash
= 0;
558 if (!hashtable
.empty())
559 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
566 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
568 for (int i
= 0; i
< int(entries
.size()); i
++) {
569 do_assert(-1 <= entries
[i
].next
&& entries
[i
].next
< int(entries
.size()));
570 int hash
= do_hash(entries
[i
].udata
);
571 entries
[i
].next
= hashtable
[hash
];
576 int do_erase(int index
, int hash
)
578 do_assert(index
< int(entries
.size()));
579 if (hashtable
.empty() || index
< 0)
582 int k
= hashtable
[hash
];
584 hashtable
[hash
] = entries
[index
].next
;
586 while (entries
[k
].next
!= index
) {
588 do_assert(0 <= k
&& k
< int(entries
.size()));
590 entries
[k
].next
= entries
[index
].next
;
593 int back_idx
= entries
.size()-1;
595 if (index
!= back_idx
)
597 int back_hash
= do_hash(entries
[back_idx
].udata
);
599 k
= hashtable
[back_hash
];
601 hashtable
[back_hash
] = index
;
603 while (entries
[k
].next
!= back_idx
) {
605 do_assert(0 <= k
&& k
< int(entries
.size()));
607 entries
[k
].next
= index
;
610 entries
[index
] = std::move(entries
[back_idx
]);
621 int do_lookup(const K
&key
, int &hash
) const
623 if (hashtable
.empty())
626 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
627 ((pool
*)this)->do_rehash();
631 int index
= hashtable
[hash
];
633 while (index
>= 0 && !ops
.cmp(entries
[index
].udata
, key
)) {
634 index
= entries
[index
].next
;
635 do_assert(-1 <= index
&& index
< int(entries
.size()));
641 int do_insert(const K
&value
, int &hash
)
643 if (hashtable
.empty()) {
644 entries
.push_back(entry_t(value
, -1));
646 hash
= do_hash(value
);
648 entries
.push_back(entry_t(value
, hashtable
[hash
]));
649 hashtable
[hash
] = entries
.size() - 1;
651 return entries
.size() - 1;
655 class const_iterator
: public std::iterator
<std::forward_iterator_tag
, K
>
661 const_iterator(const pool
*ptr
, int index
) : ptr(ptr
), index(index
) { }
664 const_iterator
operator++() { index
--; return *this; }
665 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
666 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
667 const K
&operator*() const { return ptr
->entries
[index
].udata
; }
668 const K
*operator->() const { return &ptr
->entries
[index
].udata
; }
671 class iterator
: public std::iterator
<std::forward_iterator_tag
, K
>
677 iterator(pool
*ptr
, int index
) : ptr(ptr
), index(index
) { }
680 iterator
operator++() { index
--; return *this; }
681 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
682 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
683 K
&operator*() { return ptr
->entries
[index
].udata
; }
684 K
*operator->() { return &ptr
->entries
[index
].udata
; }
685 const K
&operator*() const { return ptr
->entries
[index
].udata
; }
686 const K
*operator->() const { return &ptr
->entries
[index
].udata
; }
687 operator const_iterator() const { return const_iterator(ptr
, index
); }
694 pool(const pool
&other
)
696 entries
= other
.entries
;
705 pool
&operator=(const pool
&other
) {
706 entries
= other
.entries
;
711 pool
&operator=(pool
&&other
) {
717 pool(const std::initializer_list
<K
> &list
)
719 for (auto &it
: list
)
723 template<class InputIterator
>
724 pool(InputIterator first
, InputIterator last
)
729 template<class InputIterator
>
730 void insert(InputIterator first
, InputIterator last
)
732 for (; first
!= last
; ++first
)
736 std::pair
<iterator
, bool> insert(const K
&value
)
738 int hash
= do_hash(value
);
739 int i
= do_lookup(value
, hash
);
741 return std::pair
<iterator
, bool>(iterator(this, i
), false);
742 i
= do_insert(value
, hash
);
743 return std::pair
<iterator
, bool>(iterator(this, i
), true);
746 int erase(const K
&key
)
748 int hash
= do_hash(key
);
749 int index
= do_lookup(key
, hash
);
750 return do_erase(index
, hash
);
753 iterator
erase(iterator it
)
755 int hash
= do_hash(*it
);
756 do_erase(it
.index
, hash
);
760 int count(const K
&key
) const
762 int hash
= do_hash(key
);
763 int i
= do_lookup(key
, hash
);
764 return i
< 0 ? 0 : 1;
767 int count(const K
&key
, const_iterator it
) const
769 int hash
= do_hash(key
);
770 int i
= do_lookup(key
, hash
);
771 return i
< 0 || i
> it
.index
? 0 : 1;
774 iterator
find(const K
&key
)
776 int hash
= do_hash(key
);
777 int i
= do_lookup(key
, hash
);
780 return iterator(this, i
);
783 const_iterator
find(const K
&key
) const
785 int hash
= do_hash(key
);
786 int i
= do_lookup(key
, hash
);
789 return const_iterator(this, i
);
792 bool operator[](const K
&key
)
794 int hash
= do_hash(key
);
795 int i
= do_lookup(key
, hash
);
799 template<typename Compare
= std::less
<K
>>
800 void sort(Compare comp
= Compare())
802 std::sort(entries
.begin(), entries
.end(), [comp
](const entry_t
&a
, const entry_t
&b
){ return comp(b
.udata
, a
.udata
); });
806 void swap(pool
&other
)
808 hashtable
.swap(other
.hashtable
);
809 entries
.swap(other
.entries
);
812 bool operator==(const pool
&other
) const {
813 if (size() != other
.size())
815 for (auto &it
: entries
)
816 if (!other
.count(it
.udata
))
821 bool operator!=(const pool
&other
) const {
822 return !operator==(other
);
825 size_t size() const { return entries
.size(); }
826 bool empty() const { return entries
.empty(); }
827 void clear() { hashtable
.clear(); entries
.clear(); }
829 iterator
begin() { return iterator(this, int(entries
.size())-1); }
830 iterator
end() { return iterator(nullptr, -1); }
832 const_iterator
begin() const { return const_iterator(this, int(entries
.size())-1); }
833 const_iterator
end() const { return const_iterator(nullptr, -1); }
836 template<typename K
, int offset
, typename OPS
>
839 pool
<K
, OPS
> database
;
842 typedef typename pool
<K
, OPS
>::const_iterator const_iterator
;
844 int operator()(const K
&key
)
846 int hash
= database
.do_hash(key
);
847 int i
= database
.do_lookup(key
, hash
);
849 i
= database
.do_insert(key
, hash
);
853 int at(const K
&key
) const
855 int hash
= database
.do_hash(key
);
856 int i
= database
.do_lookup(key
, hash
);
858 throw std::out_of_range("idict::at()");
862 int count(const K
&key
) const
864 int hash
= database
.do_hash(key
);
865 int i
= database
.do_lookup(key
, hash
);
866 return i
< 0 ? 0 : 1;
869 void expect(const K
&key
, int i
)
871 int j
= (*this)(key
);
873 throw std::out_of_range("idict::expect()");
876 const K
&operator[](int index
) const
878 return database
.entries
.at(index
- offset
).udata
;
881 const_iterator
begin() const { return database
.begin(); }
882 const_iterator
end() const { return database
.end(); }
885 } /* namespace hashlib */