72e9bc2ef9f3d39080fdb1b0f33fff05f6f2499c
1 // This is free and unencumbered software released into the public domain.
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
8 // -------------------------------------------------------
9 // Written by Clifford Wolf <clifford@clifford.at> in 2014
10 // -------------------------------------------------------
20 const int hashtable_size_trigger
= 2;
21 const int hashtable_size_factor
= 3;
23 // The XOR version of DJB2
24 inline unsigned int mkhash(unsigned int a
, unsigned int b
) {
25 return ((a
<< 5) + a
) ^ b
;
28 // traditionally 5381 is used as starting value for the djb2 hash
29 const unsigned int mkhash_init
= 5381;
31 // The ADD version of DJB2
32 // (usunsigned int mkhashe this version for cache locality in b)
33 inline unsigned int mkhash_add(unsigned int a
, unsigned int b
) {
34 return ((a
<< 5) + a
) + b
;
37 inline unsigned int mkhash_xorshift(unsigned int a
) {
42 } else if (sizeof(a
) == 8) {
47 throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints");
51 template<typename T
> struct hash_ops
{
52 static inline bool cmp(const T
&a
, const T
&b
) {
55 static inline unsigned int hash(const T
&a
) {
60 template<> struct hash_ops
<int> {
62 static inline bool cmp(T a
, T b
) {
66 static inline unsigned int hash(T a
) {
71 template<> struct hash_ops
<std::string
> {
72 static inline bool cmp(const std::string
&a
, const std::string
&b
) {
75 static inline unsigned int hash(const std::string
&a
) {
83 template<typename P
, typename Q
> struct hash_ops
<std::pair
<P
, Q
>> {
84 static inline bool cmp(std::pair
<P
, Q
> a
, std::pair
<P
, Q
> b
) {
87 static inline unsigned int hash(std::pair
<P
, Q
> a
) {
90 return mkhash(p_ops
.hash(a
.first
), q_ops
.hash(a
.second
));
94 template<typename T
> struct hash_ops
<std::vector
<T
>> {
95 static inline bool cmp(std::vector
<T
> a
, std::vector
<T
> b
) {
98 static inline unsigned int hash(std::vector
<T
> a
) {
100 unsigned int h
= mkhash_init
;
102 h
= mkhash(h
, t_ops
.hash(k
));
107 struct hash_cstr_ops
{
108 static inline bool cmp(const char *a
, const char *b
) {
109 for (int i
= 0; a
[i
] || b
[i
]; i
++)
114 static inline unsigned int hash(const char *a
) {
115 unsigned int hash
= mkhash_init
;
117 hash
= mkhash(hash
, *(a
++));
122 struct hash_ptr_ops
{
123 static inline bool cmp(const void *a
, const void *b
) {
126 static inline unsigned int hash(const void *a
) {
127 return (unsigned long)a
;
131 struct hash_obj_ops
{
132 static inline bool cmp(const void *a
, const void *b
) {
136 static inline unsigned int hash(const T
*a
) {
141 inline int hashtable_size(int min_size
)
143 static std::vector
<int> zero_and_some_primes
= {
144 0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677,
145 853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289,
146 12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281,
147 120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697,
148 897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037,
149 5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411,
150 25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239,
151 121590311, 151987889, 189984863, 237481091, 296851369, 371064217
154 for (auto p
: zero_and_some_primes
)
155 if (p
>= min_size
) return p
;
157 if (sizeof(int) == 4)
158 throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables.");
160 for (auto p
: zero_and_some_primes
)
161 if (100129 * p
> min_size
) return 100129 * p
;
163 throw std::length_error("hash table exceeded maximum size.");
166 template<typename K
, typename T
, typename OPS
= hash_ops
<K
>>
171 std::pair
<K
, T
> udata
;
175 entry_t(const std::pair
<K
, T
> &udata
, int next
) : udata(udata
), next(next
) { }
178 std::vector
<int> hashtable
;
179 std::vector
<entry_t
> entries
;
183 static inline void do_assert(bool cond
) {
184 if (!cond
) throw std::runtime_error("dict<> assert failed.");
187 static inline void do_assert(bool) { }
190 int do_hash(const K
&key
) const
192 unsigned int hash
= 0;
193 if (!hashtable
.empty())
194 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
201 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
203 for (int i
= 0; i
< int(entries
.size()); i
++) {
204 do_assert(-1 <= entries
[i
].next
&& entries
[i
].next
< int(entries
.size()));
205 int hash
= do_hash(entries
[i
].udata
.first
);
206 entries
[i
].next
= hashtable
[hash
];
211 int do_erase(int index
, int hash
)
213 do_assert(index
< int(entries
.size()));
214 if (hashtable
.empty() || index
< 0)
217 int k
= hashtable
[hash
];
219 hashtable
[hash
] = entries
[index
].next
;
221 while (entries
[k
].next
!= index
) {
223 do_assert(0 <= k
&& k
< int(entries
.size()));
225 entries
[k
].next
= entries
[index
].next
;
228 int back_idx
= entries
.size()-1;
230 if (index
!= back_idx
)
232 int back_hash
= do_hash(entries
[back_idx
].udata
.first
);
234 k
= hashtable
[back_hash
];
236 hashtable
[back_hash
] = index
;
238 while (entries
[k
].next
!= back_idx
) {
240 do_assert(0 <= k
&& k
< int(entries
.size()));
242 entries
[k
].next
= index
;
245 entries
[index
] = std::move(entries
[back_idx
]);
256 int do_lookup(const K
&key
, int &hash
) const
258 if (hashtable
.empty())
261 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
262 ((dict
*)this)->do_rehash();
266 int index
= hashtable
[hash
];
268 while (index
>= 0 && !ops
.cmp(entries
[index
].udata
.first
, key
)) {
269 index
= entries
[index
].next
;
270 do_assert(-1 <= index
&& index
< int(entries
.size()));
276 int do_insert(const std::pair
<K
, T
> &value
, int &hash
)
278 if (hashtable
.empty()) {
279 entries
.push_back(entry_t(value
, -1));
281 hash
= do_hash(value
.first
);
283 entries
.push_back(entry_t(value
, hashtable
[hash
]));
284 hashtable
[hash
] = entries
.size() - 1;
286 return entries
.size() - 1;
290 class const_iterator
: public std::iterator
<std::forward_iterator_tag
, std::pair
<K
, T
>>
296 const_iterator(const dict
*ptr
, int index
) : ptr(ptr
), index(index
) { }
299 const_iterator
operator++() { index
--; return *this; }
300 bool operator<(const const_iterator
&other
) const { return index
> other
.index
; }
301 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
302 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
303 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
304 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
307 class iterator
: public std::iterator
<std::forward_iterator_tag
, std::pair
<K
, T
>>
313 iterator(dict
*ptr
, int index
) : ptr(ptr
), index(index
) { }
316 iterator
operator++() { index
--; return *this; }
317 bool operator<(const iterator
&other
) const { return index
> other
.index
; }
318 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
319 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
320 std::pair
<K
, T
> &operator*() { return ptr
->entries
[index
].udata
; }
321 std::pair
<K
, T
> *operator->() { return &ptr
->entries
[index
].udata
; }
322 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
323 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
324 operator const_iterator() const { return const_iterator(ptr
, index
); }
331 dict(const dict
&other
)
333 entries
= other
.entries
;
342 dict
&operator=(const dict
&other
) {
343 entries
= other
.entries
;
348 dict
&operator=(dict
&&other
) {
354 dict(const std::initializer_list
<std::pair
<K
, T
>> &list
)
356 for (auto &it
: list
)
360 template<class InputIterator
>
361 dict(InputIterator first
, InputIterator last
)
366 template<class InputIterator
>
367 void insert(InputIterator first
, InputIterator last
)
369 for (; first
!= last
; ++first
)
373 std::pair
<iterator
, bool> insert(const std::pair
<K
, T
> &value
)
375 int hash
= do_hash(value
.first
);
376 int i
= do_lookup(value
.first
, hash
);
378 return std::pair
<iterator
, bool>(iterator(this, i
), false);
379 i
= do_insert(value
, hash
);
380 return std::pair
<iterator
, bool>(iterator(this, i
), true);
383 int erase(const K
&key
)
385 int hash
= do_hash(key
);
386 int index
= do_lookup(key
, hash
);
387 return do_erase(index
, hash
);
390 iterator
erase(iterator it
)
392 int hash
= do_hash(it
->first
);
393 do_erase(it
.index
, hash
);
397 int count(const K
&key
) const
399 int hash
= do_hash(key
);
400 int i
= do_lookup(key
, hash
);
401 return i
< 0 ? 0 : 1;
404 int count(const K
&key
, const_iterator it
) const
406 int hash
= do_hash(key
);
407 int i
= do_lookup(key
, hash
);
408 return i
< 0 || i
> it
.index
? 0 : 1;
411 iterator
find(const K
&key
)
413 int hash
= do_hash(key
);
414 int i
= do_lookup(key
, hash
);
417 return iterator(this, i
);
420 const_iterator
find(const K
&key
) const
422 int hash
= do_hash(key
);
423 int i
= do_lookup(key
, hash
);
426 return const_iterator(this, i
);
431 int hash
= do_hash(key
);
432 int i
= do_lookup(key
, hash
);
434 throw std::out_of_range("dict::at()");
435 return entries
[i
].udata
.second
;
438 const T
& at(const K
&key
) const
440 int hash
= do_hash(key
);
441 int i
= do_lookup(key
, hash
);
443 throw std::out_of_range("dict::at()");
444 return entries
[i
].udata
.second
;
447 T
& operator[](const K
&key
)
449 int hash
= do_hash(key
);
450 int i
= do_lookup(key
, hash
);
452 i
= do_insert(std::pair
<K
, T
>(key
, T()), hash
);
453 return entries
[i
].udata
.second
;
456 void swap(dict
&other
)
458 hashtable
.swap(other
.hashtable
);
459 entries
.swap(other
.entries
);
462 bool operator==(const dict
&other
) const {
463 if (size() != other
.size())
465 for (auto &it
: entries
) {
466 auto oit
= other
.find(it
.udata
.first
);
467 if (oit
== other
.end() || oit
->second
!= it
.udata
.second
)
473 bool operator!=(const dict
&other
) const {
474 return !(*this == other
);
477 size_t size() const { return entries
.size(); }
478 bool empty() const { return entries
.empty(); }
479 void clear() { hashtable
.clear(); entries
.clear(); }
481 iterator
begin() { return iterator(this, int(entries
.size())-1); }
482 iterator
end() { return iterator(nullptr, -1); }
484 const_iterator
begin() const { return const_iterator(this, int(entries
.size())-1); }
485 const_iterator
end() const { return const_iterator(nullptr, -1); }
488 template<typename K
, typename OPS
= hash_ops
<K
>>
497 entry_t(const K
&udata
, int next
) : udata(udata
), next(next
) { }
500 std::vector
<int> hashtable
;
501 std::vector
<entry_t
> entries
;
505 static inline void do_assert(bool cond
) {
506 if (!cond
) throw std::runtime_error("pool<> assert failed.");
509 static inline void do_assert(bool) { }
512 int do_hash(const K
&key
) const
514 unsigned int hash
= 0;
515 if (!hashtable
.empty())
516 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
523 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
525 for (int i
= 0; i
< int(entries
.size()); i
++) {
526 do_assert(-1 <= entries
[i
].next
&& entries
[i
].next
< int(entries
.size()));
527 int hash
= do_hash(entries
[i
].udata
);
528 entries
[i
].next
= hashtable
[hash
];
533 int do_erase(int index
, int hash
)
535 do_assert(index
< int(entries
.size()));
536 if (hashtable
.empty() || index
< 0)
539 int k
= hashtable
[hash
];
541 hashtable
[hash
] = entries
[index
].next
;
543 while (entries
[k
].next
!= index
) {
545 do_assert(0 <= k
&& k
< int(entries
.size()));
547 entries
[k
].next
= entries
[index
].next
;
550 int back_idx
= entries
.size()-1;
552 if (index
!= back_idx
)
554 int back_hash
= do_hash(entries
[back_idx
].udata
);
556 k
= hashtable
[back_hash
];
558 hashtable
[back_hash
] = index
;
560 while (entries
[k
].next
!= back_idx
) {
562 do_assert(0 <= k
&& k
< int(entries
.size()));
564 entries
[k
].next
= index
;
567 entries
[index
] = std::move(entries
[back_idx
]);
578 int do_lookup(const K
&key
, int &hash
) const
580 if (hashtable
.empty())
583 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
584 ((pool
*)this)->do_rehash();
588 int index
= hashtable
[hash
];
590 while (index
>= 0 && !ops
.cmp(entries
[index
].udata
, key
)) {
591 index
= entries
[index
].next
;
592 do_assert(-1 <= index
&& index
< int(entries
.size()));
598 int do_insert(const K
&value
, int &hash
)
600 if (hashtable
.empty()) {
601 entries
.push_back(entry_t(value
, -1));
603 hash
= do_hash(value
);
605 entries
.push_back(entry_t(value
, hashtable
[hash
]));
606 hashtable
[hash
] = entries
.size() - 1;
608 return entries
.size() - 1;
612 class const_iterator
: public std::iterator
<std::forward_iterator_tag
, K
>
618 const_iterator(const pool
*ptr
, int index
) : ptr(ptr
), index(index
) { }
621 const_iterator
operator++() { index
--; return *this; }
622 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
623 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
624 const K
&operator*() const { return ptr
->entries
[index
].udata
; }
625 const K
*operator->() const { return &ptr
->entries
[index
].udata
; }
628 class iterator
: public std::iterator
<std::forward_iterator_tag
, K
>
634 iterator(pool
*ptr
, int index
) : ptr(ptr
), index(index
) { }
637 iterator
operator++() { index
--; return *this; }
638 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
639 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
640 K
&operator*() { return ptr
->entries
[index
].udata
; }
641 K
*operator->() { return &ptr
->entries
[index
].udata
; }
642 const K
&operator*() const { return ptr
->entries
[index
].udata
; }
643 const K
*operator->() const { return &ptr
->entries
[index
].udata
; }
644 operator const_iterator() const { return const_iterator(ptr
, index
); }
651 pool(const pool
&other
)
653 entries
= other
.entries
;
662 pool
&operator=(const pool
&other
) {
663 entries
= other
.entries
;
668 pool
&operator=(pool
&&other
) {
674 pool(const std::initializer_list
<K
> &list
)
676 for (auto &it
: list
)
680 template<class InputIterator
>
681 pool(InputIterator first
, InputIterator last
)
686 template<class InputIterator
>
687 void insert(InputIterator first
, InputIterator last
)
689 for (; first
!= last
; ++first
)
693 std::pair
<iterator
, bool> insert(const K
&value
)
695 int hash
= do_hash(value
);
696 int i
= do_lookup(value
, hash
);
698 return std::pair
<iterator
, bool>(iterator(this, i
), false);
699 i
= do_insert(value
, hash
);
700 return std::pair
<iterator
, bool>(iterator(this, i
), true);
703 int erase(const K
&key
)
705 int hash
= do_hash(key
);
706 int index
= do_lookup(key
, hash
);
707 return do_erase(index
, hash
);
710 iterator
erase(iterator it
)
712 int hash
= do_hash(*it
);
713 do_erase(it
.index
, hash
);
717 int count(const K
&key
) const
719 int hash
= do_hash(key
);
720 int i
= do_lookup(key
, hash
);
721 return i
< 0 ? 0 : 1;
724 int count(const K
&key
, const_iterator it
) const
726 int hash
= do_hash(key
);
727 int i
= do_lookup(key
, hash
);
728 return i
< 0 || i
> it
.index
? 0 : 1;
731 iterator
find(const K
&key
)
733 int hash
= do_hash(key
);
734 int i
= do_lookup(key
, hash
);
737 return iterator(this, i
);
740 const_iterator
find(const K
&key
) const
742 int hash
= do_hash(key
);
743 int i
= do_lookup(key
, hash
);
746 return const_iterator(this, i
);
749 bool operator[](const K
&key
)
751 int hash
= do_hash(key
);
752 int i
= do_lookup(key
, hash
);
756 void swap(pool
&other
)
758 hashtable
.swap(other
.hashtable
);
759 entries
.swap(other
.entries
);
762 bool operator==(const pool
&other
) const {
763 if (size() != other
.size())
765 for (auto &it
: entries
)
766 if (!other
.count(it
.udata
))
771 bool operator!=(const pool
&other
) const {
772 return !(*this == other
);
775 size_t size() const { return entries
.size(); }
776 bool empty() const { return entries
.empty(); }
777 void clear() { hashtable
.clear(); entries
.clear(); }
779 iterator
begin() { return iterator(this, int(entries
.size())-1); }
780 iterator
end() { return iterator(nullptr, -1); }
782 const_iterator
begin() const { return const_iterator(this, int(entries
.size())-1); }
783 const_iterator
end() const { return const_iterator(nullptr, -1); }
786 } /* namespace hashlib */