1 // This is free and unencumbered software released into the public domain.
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
8 // -------------------------------------------------------
9 // Written by Clifford Wolf <clifford@clifford.at> in 2014
10 // -------------------------------------------------------
20 const int hashtable_size_trigger
= 2;
21 const int hashtable_size_factor
= 3;
23 // The XOR version of DJB2
24 // (traditionally 5381 is used as starting value for the djb2 hash)
25 inline unsigned int mkhash(unsigned int a
, unsigned int b
) {
26 return ((a
<< 5) + a
) ^ b
;
29 // The ADD version of DJB2
30 // (use this version for cache locality in b)
31 inline unsigned int mkhash_add(unsigned int a
, unsigned int b
) {
32 return ((a
<< 5) + a
) + b
;
35 inline unsigned int mkhash_xorshift(unsigned int a
) {
40 } else if (sizeof(a
) == 8) {
45 throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints");
49 template<typename T
> struct hash_ops
{
50 bool cmp(const T
&a
, const T
&b
) const {
53 unsigned int hash(const T
&a
) const {
58 template<> struct hash_ops
<int> {
60 bool cmp(T a
, T b
) const {
63 unsigned int hash(unsigned int a
) const {
68 template<> struct hash_ops
<std::string
> {
69 bool cmp(const std::string
&a
, const std::string
&b
) const {
72 unsigned int hash(const std::string
&a
) const {
80 template<typename P
, typename Q
> struct hash_ops
<std::pair
<P
, Q
>> {
81 bool cmp(std::pair
<P
, Q
> a
, std::pair
<P
, Q
> b
) const {
84 unsigned int hash(std::pair
<P
, Q
> a
) const {
87 return mkhash(p_ops
.hash(a
.first
), q_ops
.hash(a
.second
));
91 struct hash_cstr_ops
{
92 bool cmp(const char *a
, const char *b
) const {
93 for (int i
= 0; a
[i
] || b
[i
]; i
++)
98 unsigned int hash(const char *a
) const {
99 unsigned int hash
= 5381;
101 hash
= mkhash(hash
, *(a
++));
106 struct hash_ptr_ops
{
107 bool cmp(const void *a
, const void *b
) const {
110 unsigned int hash(const void *a
) const {
111 return (unsigned long)a
;
115 struct hash_obj_ops
{
116 bool cmp(const void *a
, const void *b
) const {
120 unsigned int hash(const T
*a
) const {
125 inline int hashtable_size(int min_size
)
127 static std::vector
<int> primes
= {
128 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677,
129 853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289,
130 12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281,
131 120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697,
132 897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037,
133 5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411,
134 25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239,
135 121590311, 151987889, 189984863, 237481091, 296851369, 371064217
138 for (auto p
: primes
)
139 if (p
> min_size
) return p
;
141 if (sizeof(int) == 4)
142 throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables.");
144 for (auto p
: primes
)
145 if (100129 * p
> min_size
) return 100129 * p
;
147 throw std::length_error("hash table exceeded maximum size.");
150 template<typename K
, typename T
, typename OPS
= hash_ops
<K
>>
156 std::pair
<K
, T
> udata
;
158 entry_t() : link(-1) { }
159 entry_t(const std::pair
<K
, T
> &udata
) : link(1), udata(udata
) { }
161 bool is_free() const { return link
< 0; }
162 int get_next() const { return (link
> 0 ? link
: -link
) - 2; }
163 bool get_last() const { return get_next() == -1; }
164 void set_next_used(int next
) { link
= next
+ 2; }
165 void set_next_free(int next
) { link
= -(next
+ 2); }
168 std::vector
<int> hashtable
;
169 std::vector
<entry_t
> entries
;
170 int free_list
, counter
, begin_n
;
171 int begin_seek_count
;
179 begin_seek_count
= 0;
182 void init_from(const dict
<K
, T
, OPS
> &other
)
187 counter
= other
.size();
188 begin_n
= counter
- 1;
189 entries
.reserve(counter
);
191 for (auto &it
: other
)
192 entries
.push_back(entry_t(it
));
197 int mkhash(const K
&key
) const
199 unsigned int hash
= 0;
200 if (!hashtable
.empty())
201 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
205 void upd_begin_n(bool do_refree
= true)
208 begin_n
= -(begin_n
+2);
209 while (begin_n
>= 0 && entries
[begin_n
].is_free()) { begin_seek_count
++; begin_n
--; }
210 if (do_refree
&& begin_seek_count
> int(entries
.size() / 2)) refree();
220 for (int i
= 0; i
< int(entries
.size()); i
++)
221 if (entries
[i
].is_free()) {
223 entries
[last_free
].set_next_free(i
);
231 entries
[last_free
].set_next_free(-1);
233 begin_seek_count
= 0;
239 entries
.resize(begin_n
+ 1);
245 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
248 for (int i
= 0; i
< int(entries
.size()); i
++)
249 if (entries
[i
].is_free()) {
251 entries
[last_free
].set_next_free(i
);
256 int hash
= mkhash(entries
[i
].udata
.first
);
257 entries
[i
].set_next_used(hashtable
[hash
]);
263 entries
[last_free
].set_next_free(-1);
265 begin_seek_count
= 0;
268 int do_erase(const K
&key
, int hash
)
271 int index
= hashtable
.empty() ? -1 : hashtable
[hash
];
275 if (ops
.cmp(entries
[index
].udata
.first
, key
)) {
277 hashtable
[hash
] = entries
[index
].get_next();
279 entries
[last_index
].set_next_used(entries
[index
].get_next());
280 entries
[index
].udata
= std::pair
<K
, T
>();
281 entries
[index
].set_next_free(free_list
);
285 else if (index
== begin_n
)
286 begin_n
= -(begin_n
+2);
290 index
= entries
[index
].get_next();
294 int lookup_index(const K
&key
, int hash
) const
296 int index
= hashtable
.empty() ? -1 : hashtable
[hash
];
300 if (ops
.cmp(entries
[index
].udata
.first
, key
))
302 index
= entries
[index
].get_next();
306 int insert_at(const std::pair
<K
, T
> &value
, int hash
)
310 free_list
= entries
.size();
311 entries
.push_back(entry_t());
313 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
315 entries
[i
].udata
= value
;
316 entries
[i
].set_next_used(0);
325 free_list
= entries
[i
].get_next();
326 entries
[i
].udata
= value
;
327 entries
[i
].set_next_used(hashtable
[hash
]);
329 if ((begin_n
< -1 && -(begin_n
+2) <= i
) || (begin_n
>= -1 && begin_n
<= i
))
338 dict
<K
, T
, OPS
> *ptr
;
342 iterator(dict
<K
, T
, OPS
> *ptr
, int index
) : ptr(ptr
), index(index
) { }
343 iterator
operator++() { do index
--; while (index
>= 0 && ptr
->entries
[index
].is_free()); return *this; }
344 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
345 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
346 std::pair
<K
, T
> &operator*() { return ptr
->entries
[index
].udata
; }
347 std::pair
<K
, T
> *operator->() { return &ptr
->entries
[index
].udata
; }
348 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
349 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
354 const dict
<K
, T
, OPS
> *ptr
;
358 const_iterator(const dict
<K
, T
, OPS
> *ptr
, int index
) : ptr(ptr
), index(index
) { }
359 const_iterator
operator++() { do index
--; while (index
>= 0 && ptr
->entries
[index
].is_free()); return *this; }
360 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
361 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
362 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
363 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
371 dict(const dict
<K
, T
, OPS
> &other
)
376 dict(dict
<K
, T
, OPS
> &&other
)
382 dict
<K
, T
, OPS
> &operator=(const dict
<K
, T
, OPS
> &other
) {
388 dict
<K
, T
, OPS
> &operator=(dict
<K
, T
, OPS
> &&other
) {
394 dict(const std::initializer_list
<std::pair
<K
, T
>> &list
)
397 for (auto &it
: list
)
401 template<class InputIterator
>
402 dict(InputIterator first
, InputIterator last
)
408 template<class InputIterator
>
409 void insert(InputIterator first
, InputIterator last
)
411 for (; first
!= last
; ++first
)
415 std::pair
<iterator
, bool> insert(const std::pair
<K
, T
> &value
)
417 int hash
= mkhash(value
.first
);
418 int i
= lookup_index(value
.first
, hash
);
420 return std::pair
<iterator
, bool>(iterator(this, i
), false);
421 i
= insert_at(value
, hash
);
422 return std::pair
<iterator
, bool>(iterator(this, i
), true);
425 int erase(const K
&key
)
427 int hash
= mkhash(key
);
428 return do_erase(key
, hash
);
431 iterator
erase(iterator it
)
433 int hash
= mkhash(it
->first
);
434 do_erase(it
->first
, hash
);
438 int count(const K
&key
) const
440 int hash
= mkhash(key
);
441 int i
= lookup_index(key
, hash
);
442 return i
< 0 ? 0 : 1;
445 iterator
find(const K
&key
)
447 int hash
= mkhash(key
);
448 int i
= lookup_index(key
, hash
);
451 return iterator(this, i
);
454 const_iterator
find(const K
&key
) const
456 int hash
= mkhash(key
);
457 int i
= lookup_index(key
, hash
);
460 return const_iterator(this, i
);
465 int hash
= mkhash(key
);
466 int i
= lookup_index(key
, hash
);
468 throw std::out_of_range("dict::at()");
469 return entries
[i
].udata
.second
;
472 const T
& at(const K
&key
) const
474 int hash
= mkhash(key
);
475 int i
= lookup_index(key
, hash
);
477 throw std::out_of_range("dict::at()");
478 return entries
[i
].udata
.second
;
481 T
& operator[](const K
&key
)
483 int hash
= mkhash(key
);
484 int i
= lookup_index(key
, hash
);
486 i
= insert_at(std::pair
<K
, T
>(key
, T()), hash
);
487 return entries
[i
].udata
.second
;
490 void swap(dict
<K
, T
, OPS
> &other
)
492 hashtable
.swap(other
.hashtable
);
493 entries
.swap(other
.entries
);
494 std::swap(free_list
, other
.free_list
);
495 std::swap(counter
, other
.counter
);
496 std::swap(begin_n
, other
.begin_n
);
497 std::swap(begin_seek_count
, other
.begin_seek_count
);
500 bool operator==(const dict
<K
, T
, OPS
> &other
) const {
501 if (counter
!= other
.counter
)
505 if (entries
.size() < other
.entries
.size())
506 for (auto &it
: *this) {
507 auto oit
= other
.find(it
.first
);
508 if (oit
== other
.end() || oit
->second
!= it
.second
)
512 for (auto &oit
: other
) {
513 auto it
= find(oit
.first
);
514 if (it
== end() || it
->second
!= oit
.second
)
520 bool operator!=(const dict
<K
, T
, OPS
> &other
) const {
521 return !(*this == other
);
524 size_t size() const { return counter
; }
525 bool empty() const { return counter
== 0; }
526 void clear() { hashtable
.clear(); entries
.clear(); init(); }
528 iterator
begin() { upd_begin_n(); return iterator(this, begin_n
); }
529 iterator
end() { return iterator(this, -1); }
531 const_iterator
begin() const { ((dict
*)this)->upd_begin_n(); return const_iterator(this, begin_n
); }
532 const_iterator
end() const { return const_iterator(this, -1); }
535 template<typename K
, typename OPS
= hash_ops
<K
>>
543 entry_t() : link(-1) { }
544 entry_t(const K
&key
) : link(1), key(key
) { }
546 bool is_free() const { return link
< 0; }
547 int get_next() const { return (link
> 0 ? link
: -link
) - 2; }
548 bool get_last() const { return get_next() == -1; }
549 void set_next_used(int next
) { link
= next
+ 2; }
550 void set_next_free(int next
) { link
= -(next
+ 2); }
553 std::vector
<int> hashtable
;
554 std::vector
<entry_t
> entries
;
555 int free_list
, counter
, begin_n
;
556 int begin_seek_count
;
564 begin_seek_count
= 0;
567 void init_from(const pool
<K
, OPS
> &other
)
572 counter
= other
.size();
573 begin_n
= counter
- 1;
574 entries
.reserve(counter
);
576 for (auto &it
: other
)
577 entries
.push_back(entry_t(it
));
582 int mkhash(const K
&key
) const
584 unsigned int hash
= 0;
585 if (!hashtable
.empty())
586 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
590 void upd_begin_n(bool do_refree
= true)
593 begin_n
= -(begin_n
+2);
594 while (begin_n
>= 0 && entries
[begin_n
].is_free()) { begin_seek_count
++; begin_n
--; }
595 if (do_refree
&& begin_seek_count
> int(entries
.size() / 2)) refree();
605 for (int i
= 0; i
< int(entries
.size()); i
++)
606 if (entries
[i
].is_free()) {
608 entries
[last_free
].set_next_free(i
);
616 entries
[last_free
].set_next_free(-1);
618 begin_seek_count
= 0;
624 entries
.resize(begin_n
+ 1);
630 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
633 for (int i
= 0; i
< int(entries
.size()); i
++)
634 if (entries
[i
].is_free()) {
636 entries
[last_free
].set_next_free(i
);
641 int hash
= mkhash(entries
[i
].key
);
642 entries
[i
].set_next_used(hashtable
[hash
]);
648 entries
[last_free
].set_next_free(-1);
650 begin_seek_count
= 0;
653 int do_erase(const K
&key
, int hash
)
656 int index
= hashtable
.empty() ? -1 : hashtable
[hash
];
660 if (ops
.cmp(entries
[index
].key
, key
)) {
662 hashtable
[hash
] = entries
[index
].get_next();
664 entries
[last_index
].set_next_used(entries
[index
].get_next());
665 entries
[index
].key
= K();
666 entries
[index
].set_next_free(free_list
);
670 else if (index
== begin_n
)
671 begin_n
= -(begin_n
+2);
675 index
= entries
[index
].get_next();
679 int lookup_index(const K
&key
, int hash
) const
681 int index
= hashtable
.empty() ? -1 : hashtable
[hash
];
685 if (ops
.cmp(entries
[index
].key
, key
))
687 index
= entries
[index
].get_next();
691 int insert_at(const K
&key
, int hash
)
695 free_list
= entries
.size();
696 entries
.push_back(entry_t());
698 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
700 entries
[i
].key
= key
;
701 entries
[i
].set_next_used(0);
710 free_list
= entries
[i
].get_next();
711 entries
[i
].key
= key
;
712 entries
[i
].set_next_used(hashtable
[hash
]);
714 if ((begin_n
< -1 && -(begin_n
+2) <= i
) || (begin_n
>= -1 && begin_n
<= i
))
727 iterator(pool
<K
, OPS
> *ptr
, int index
) : ptr(ptr
), index(index
) { }
728 iterator
operator++() { do index
--; while (index
>= 0 && ptr
->entries
[index
].is_free()); return *this; }
729 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
730 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
731 K
&operator*() { return ptr
->entries
[index
].key
; }
732 K
*operator->() { return &ptr
->entries
[index
].key
; }
733 const K
&operator*() const { return ptr
->entries
[index
].key
; }
734 const K
*operator->() const { return &ptr
->entries
[index
].key
; }
739 const pool
<K
, OPS
> *ptr
;
743 const_iterator(const pool
<K
, OPS
> *ptr
, int index
) : ptr(ptr
), index(index
) { }
744 const_iterator
operator++() { do index
--; while (index
>= 0 && ptr
->entries
[index
].is_free()); return *this; }
745 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
746 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
747 const K
&operator*() const { return ptr
->entries
[index
].key
; }
748 const K
*operator->() const { return &ptr
->entries
[index
].key
; }
756 pool(const pool
<K
, OPS
> &other
)
761 pool(pool
<K
, OPS
> &&other
)
767 pool
<K
, OPS
> &operator=(const pool
<K
, OPS
> &other
) {
773 pool
<K
, OPS
> &operator=(pool
<K
, OPS
> &&other
) {
779 pool(const std::initializer_list
<K
> &list
)
782 for (auto &it
: list
)
786 template<class InputIterator
>
787 pool(InputIterator first
, InputIterator last
)
793 template<class InputIterator
>
794 void insert(InputIterator first
, InputIterator last
)
796 for (; first
!= last
; ++first
)
800 std::pair
<iterator
, bool> insert(const K
&key
)
802 int hash
= mkhash(key
);
803 int i
= lookup_index(key
, hash
);
805 return std::pair
<iterator
, bool>(iterator(this, i
), false);
806 i
= insert_at(key
, hash
);
807 return std::pair
<iterator
, bool>(iterator(this, i
), true);
810 int erase(const K
&key
)
812 int hash
= mkhash(key
);
813 return do_erase(key
, hash
);
816 iterator
erase(iterator it
)
818 int hash
= mkhash(*it
);
823 int count(const K
&key
) const
825 int hash
= mkhash(key
);
826 int i
= lookup_index(key
, hash
);
827 return i
< 0 ? 0 : 1;
830 iterator
find(const K
&key
)
832 int hash
= mkhash(key
);
833 int i
= lookup_index(key
, hash
);
836 return iterator(this, i
);
839 const_iterator
find(const K
&key
) const
841 int hash
= mkhash(key
);
842 int i
= lookup_index(key
, hash
);
845 return const_iterator(this, i
);
848 bool operator[](const K
&key
) const
850 int hash
= mkhash(key
);
851 int i
= lookup_index(key
, hash
);
855 void swap(pool
<K
, OPS
> &other
)
857 hashtable
.swap(other
.hashtable
);
858 entries
.swap(other
.entries
);
859 std::swap(free_list
, other
.free_list
);
860 std::swap(counter
, other
.counter
);
861 std::swap(begin_n
, other
.begin_n
);
862 std::swap(begin_seek_count
, other
.begin_seek_count
);
865 bool operator==(const pool
<K
, OPS
> &other
) const {
866 if (counter
!= other
.counter
)
870 if (entries
.size() < other
.entries
.size())
871 for (auto &it
: *this) {
872 auto oit
= other
.find(it
.first
);
873 if (oit
== other
.end() || oit
->second
!= it
.second
)
877 for (auto &oit
: other
) {
878 auto it
= find(oit
.first
);
879 if (it
== end() || it
->second
!= oit
.second
)
885 bool operator!=(const pool
<K
, OPS
> &other
) const {
886 return !(*this == other
);
889 size_t size() const { return counter
; }
890 bool empty() const { return counter
== 0; }
891 void clear() { hashtable
.clear(); entries
.clear(); init(); }
893 iterator
begin() { upd_begin_n(); return iterator(this, begin_n
); }
894 iterator
end() { return iterator(this, -1); }
896 const_iterator
begin() const { ((pool
*)this)->upd_begin_n(); return const_iterator(this, begin_n
); }
897 const_iterator
end() const { return const_iterator(this, -1); }
900 } /* namespace hashlib */