7f2575d725a242763bf7cb695f5bab99d4365a54
1 // This is free and unencumbered software released into the public domain.
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
8 // -------------------------------------------------------
9 // Written by Clifford Wolf <clifford@clifford.at> in 2014
10 // -------------------------------------------------------
20 const int hashtable_size_trigger
= 2;
21 const int hashtable_size_factor
= 3;
23 // The XOR version of DJB2
24 inline unsigned int mkhash(unsigned int a
, unsigned int b
) {
25 return ((a
<< 5) + a
) ^ b
;
28 // traditionally 5381 is used as starting value for the djb2 hash
29 const unsigned int mkhash_init
= 5381;
31 // The ADD version of DJB2
32 // (usunsigned int mkhashe this version for cache locality in b)
33 inline unsigned int mkhash_add(unsigned int a
, unsigned int b
) {
34 return ((a
<< 5) + a
) + b
;
37 inline unsigned int mkhash_xorshift(unsigned int a
) {
42 } else if (sizeof(a
) == 8) {
47 throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints");
51 template<typename T
> struct hash_ops
{
52 bool cmp(const T
&a
, const T
&b
) const {
55 unsigned int hash(const T
&a
) const {
60 template<> struct hash_ops
<int> {
62 bool cmp(T a
, T b
) const {
66 unsigned int hash(T a
) const {
71 template<> struct hash_ops
<std::string
> {
72 bool cmp(const std::string
&a
, const std::string
&b
) const {
75 unsigned int hash(const std::string
&a
) const {
83 template<typename P
, typename Q
> struct hash_ops
<std::pair
<P
, Q
>> {
84 bool cmp(std::pair
<P
, Q
> a
, std::pair
<P
, Q
> b
) const {
87 unsigned int hash(std::pair
<P
, Q
> a
) const {
90 return mkhash(p_ops
.hash(a
.first
), q_ops
.hash(a
.second
));
94 template<typename T
> struct hash_ops
<std::vector
<T
>> {
95 bool cmp(std::vector
<T
> a
, std::vector
<T
> b
) const {
98 unsigned int hash(std::vector
<T
> a
) const {
100 unsigned int h
= mkhash_init
;
102 h
= mkhash(h
, t_ops
.hash(k
));
107 struct hash_cstr_ops
{
108 bool cmp(const char *a
, const char *b
) const {
109 for (int i
= 0; a
[i
] || b
[i
]; i
++)
114 unsigned int hash(const char *a
) const {
115 unsigned int hash
= mkhash_init
;
117 hash
= mkhash(hash
, *(a
++));
122 struct hash_ptr_ops
{
123 bool cmp(const void *a
, const void *b
) const {
126 unsigned int hash(const void *a
) const {
127 return (unsigned long)a
;
131 struct hash_obj_ops
{
132 bool cmp(const void *a
, const void *b
) const {
136 unsigned int hash(const T
*a
) const {
141 inline int hashtable_size(int min_size
)
143 static std::vector
<int> zero_and_some_primes
= {
144 0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677,
145 853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289,
146 12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281,
147 120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697,
148 897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037,
149 5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411,
150 25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239,
151 121590311, 151987889, 189984863, 237481091, 296851369, 371064217
154 for (auto p
: zero_and_some_primes
)
155 if (p
>= min_size
) return p
;
157 if (sizeof(int) == 4)
158 throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables.");
160 for (auto p
: zero_and_some_primes
)
161 if (100129 * p
> min_size
) return 100129 * p
;
163 throw std::length_error("hash table exceeded maximum size.");
166 template<typename K
, typename T
, typename OPS
= hash_ops
<K
>>
171 std::pair
<K
, T
> udata
;
175 entry_t(const std::pair
<K
, T
> &udata
, int next
) : udata(udata
), next(next
) { }
178 std::vector
<int> hashtable
;
179 std::vector
<entry_t
> entries
;
183 static inline void do_assert(bool cond
) {
184 if (!cond
) throw std::runtime_error("dict<> assert failed.");
187 static inline void do_assert(bool) { }
190 int do_hash(const K
&key
) const
192 unsigned int hash
= 0;
193 if (!hashtable
.empty())
194 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
201 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
203 for (int i
= 0; i
< int(entries
.size()); i
++) {
204 do_assert(-1 <= entries
[i
].next
&& entries
[i
].next
< int(entries
.size()));
205 int hash
= do_hash(entries
[i
].udata
.first
);
206 entries
[i
].next
= hashtable
[hash
];
211 int do_erase(int index
, int hash
)
213 do_assert(index
< int(entries
.size()));
214 if (hashtable
.empty() || index
< 0)
217 int k
= hashtable
[hash
];
219 hashtable
[hash
] = entries
[index
].next
;
221 while (entries
[k
].next
!= index
) {
223 do_assert(0 <= k
&& k
< int(entries
.size()));
225 entries
[k
].next
= entries
[index
].next
;
228 int back_idx
= entries
.size()-1;
230 if (index
!= back_idx
)
232 int back_hash
= do_hash(entries
[back_idx
].udata
.first
);
234 k
= hashtable
[back_hash
];
236 hashtable
[back_hash
] = index
;
238 while (entries
[k
].next
!= back_idx
) {
240 do_assert(0 <= k
&& k
< int(entries
.size()));
242 entries
[k
].next
= index
;
245 entries
[index
] = std::move(entries
[back_idx
]);
256 int do_lookup(const K
&key
, int &hash
) const
258 if (hashtable
.empty())
261 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
262 ((dict
*)this)->do_rehash();
266 int index
= hashtable
[hash
];
268 while (index
>= 0 && !ops
.cmp(entries
[index
].udata
.first
, key
)) {
269 index
= entries
[index
].next
;
270 do_assert(-1 <= index
&& index
< int(entries
.size()));
276 int do_insert(const std::pair
<K
, T
> &value
, int &hash
)
278 if (hashtable
.empty()) {
279 entries
.push_back(entry_t(value
, -1));
281 hash
= do_hash(value
.first
);
283 entries
.push_back(entry_t(value
, hashtable
[hash
]));
284 hashtable
[hash
] = entries
.size() - 1;
286 return entries
.size() - 1;
298 iterator(dict
*ptr
, int index
) : ptr(ptr
), index(index
) { }
299 iterator
operator++() { index
--; return *this; }
300 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
301 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
302 std::pair
<K
, T
> &operator*() { return ptr
->entries
[index
].udata
; }
303 std::pair
<K
, T
> *operator->() { return &ptr
->entries
[index
].udata
; }
304 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
305 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
316 const_iterator(const dict
*ptr
, int index
) : ptr(ptr
), index(index
) { }
317 const_iterator
operator++() { index
--; return *this; }
318 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
319 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
320 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
321 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
328 dict(const dict
&other
)
330 entries
= other
.entries
;
339 dict
&operator=(const dict
&other
) {
340 entries
= other
.entries
;
345 dict
&operator=(dict
&&other
) {
351 dict(const std::initializer_list
<std::pair
<K
, T
>> &list
)
353 for (auto &it
: list
)
357 template<class InputIterator
>
358 dict(InputIterator first
, InputIterator last
)
363 template<class InputIterator
>
364 void insert(InputIterator first
, InputIterator last
)
366 for (; first
!= last
; ++first
)
370 std::pair
<iterator
, bool> insert(const std::pair
<K
, T
> &value
)
372 int hash
= do_hash(value
.first
);
373 int i
= do_lookup(value
.first
, hash
);
375 return std::pair
<iterator
, bool>(iterator(this, i
), false);
376 i
= do_insert(value
, hash
);
377 return std::pair
<iterator
, bool>(iterator(this, i
), true);
380 int erase(const K
&key
)
382 int hash
= do_hash(key
);
383 int index
= do_lookup(key
, hash
);
384 return do_erase(index
, hash
);
387 iterator
erase(iterator it
)
389 int hash
= do_hash(it
->first
);
390 do_erase(it
.index
, hash
);
394 int count(const K
&key
) const
396 int hash
= do_hash(key
);
397 int i
= do_lookup(key
, hash
);
398 return i
< 0 ? 0 : 1;
401 iterator
find(const K
&key
)
403 int hash
= do_hash(key
);
404 int i
= do_lookup(key
, hash
);
407 return iterator(this, i
);
410 const_iterator
find(const K
&key
) const
412 int hash
= do_hash(key
);
413 int i
= do_lookup(key
, hash
);
416 return const_iterator(this, i
);
421 int hash
= do_hash(key
);
422 int i
= do_lookup(key
, hash
);
424 throw std::out_of_range("dict::at()");
425 return entries
[i
].udata
.second
;
428 const T
& at(const K
&key
) const
430 int hash
= do_hash(key
);
431 int i
= do_lookup(key
, hash
);
433 throw std::out_of_range("dict::at()");
434 return entries
[i
].udata
.second
;
437 T
& operator[](const K
&key
)
439 int hash
= do_hash(key
);
440 int i
= do_lookup(key
, hash
);
442 i
= do_insert(std::pair
<K
, T
>(key
, T()), hash
);
443 return entries
[i
].udata
.second
;
446 void swap(dict
&other
)
448 hashtable
.swap(other
.hashtable
);
449 entries
.swap(other
.entries
);
452 bool operator==(const dict
&other
) const {
453 if (size() != other
.size())
455 for (auto &it
: entries
) {
456 auto oit
= other
.find(it
.udata
.first
);
457 if (oit
== other
.end() || oit
->second
!= it
.udata
.second
)
463 bool operator!=(const dict
&other
) const {
464 return !(*this == other
);
467 size_t size() const { return entries
.size(); }
468 bool empty() const { return entries
.empty(); }
469 void clear() { hashtable
.clear(); entries
.clear(); }
471 iterator
begin() { return iterator(this, int(entries
.size())-1); }
472 iterator
end() { return iterator(nullptr, -1); }
474 const_iterator
begin() const { return const_iterator(this, int(entries
.size())-1); }
475 const_iterator
end() const { return const_iterator(nullptr, -1); }
478 // ********************************************************************************
479 // ********************************************************************************
480 // ********************************************************************************
481 // ********************************************************************************
482 // ********************************************************************************
485 template<typename K
, typename OPS
= hash_ops
<K
>>
494 entry_t(const K
&udata
, int next
) : udata(udata
), next(next
) { }
497 std::vector
<int> hashtable
;
498 std::vector
<entry_t
> entries
;
502 static inline void do_assert(bool cond
) {
503 if (!cond
) throw std::runtime_error("pool<> assert failed.");
506 static inline void do_assert(bool) { }
509 int do_hash(const K
&key
) const
511 unsigned int hash
= 0;
512 if (!hashtable
.empty())
513 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
520 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
522 for (int i
= 0; i
< int(entries
.size()); i
++) {
523 do_assert(-1 <= entries
[i
].next
&& entries
[i
].next
< int(entries
.size()));
524 int hash
= do_hash(entries
[i
].udata
);
525 entries
[i
].next
= hashtable
[hash
];
530 int do_erase(int index
, int hash
)
532 do_assert(index
< int(entries
.size()));
533 if (hashtable
.empty() || index
< 0)
536 int k
= hashtable
[hash
];
538 hashtable
[hash
] = entries
[index
].next
;
540 while (entries
[k
].next
!= index
) {
542 do_assert(0 <= k
&& k
< int(entries
.size()));
544 entries
[k
].next
= entries
[index
].next
;
547 int back_idx
= entries
.size()-1;
549 if (index
!= back_idx
)
551 int back_hash
= do_hash(entries
[back_idx
].udata
);
553 k
= hashtable
[back_hash
];
555 hashtable
[back_hash
] = index
;
557 while (entries
[k
].next
!= back_idx
) {
559 do_assert(0 <= k
&& k
< int(entries
.size()));
561 entries
[k
].next
= index
;
564 entries
[index
] = std::move(entries
[back_idx
]);
575 int do_lookup(const K
&key
, int &hash
) const
577 if (hashtable
.empty())
580 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
581 ((pool
*)this)->do_rehash();
585 int index
= hashtable
[hash
];
587 while (index
>= 0 && !ops
.cmp(entries
[index
].udata
, key
)) {
588 index
= entries
[index
].next
;
589 do_assert(-1 <= index
&& index
< int(entries
.size()));
595 int do_insert(const K
&value
, int &hash
)
597 if (hashtable
.empty()) {
598 entries
.push_back(entry_t(value
, -1));
600 hash
= do_hash(value
);
602 entries
.push_back(entry_t(value
, hashtable
[hash
]));
603 hashtable
[hash
] = entries
.size() - 1;
605 return entries
.size() - 1;
617 iterator(pool
*ptr
, int index
) : ptr(ptr
), index(index
) { }
618 iterator
operator++() { index
--; return *this; }
619 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
620 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
621 const K
&operator*() const { return ptr
->entries
[index
].udata
; }
622 const K
*operator->() const { return &ptr
->entries
[index
].udata
; }
633 const_iterator(const pool
*ptr
, int index
) : ptr(ptr
), index(index
) { }
634 const_iterator
operator++() { index
--; return *this; }
635 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
636 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
637 const K
&operator*() const { return ptr
->entries
[index
].udata
; }
638 const K
*operator->() const { return &ptr
->entries
[index
].udata
; }
645 pool(const pool
&other
)
647 entries
= other
.entries
;
656 pool
&operator=(const pool
&other
) {
657 entries
= other
.entries
;
662 pool
&operator=(pool
&&other
) {
668 pool(const std::initializer_list
<K
> &list
)
670 for (auto &it
: list
)
674 template<class InputIterator
>
675 pool(InputIterator first
, InputIterator last
)
680 template<class InputIterator
>
681 void insert(InputIterator first
, InputIterator last
)
683 for (; first
!= last
; ++first
)
687 std::pair
<iterator
, bool> insert(const K
&value
)
689 int hash
= do_hash(value
);
690 int i
= do_lookup(value
, hash
);
692 return std::pair
<iterator
, bool>(iterator(this, i
), false);
693 i
= do_insert(value
, hash
);
694 return std::pair
<iterator
, bool>(iterator(this, i
), true);
697 int erase(const K
&key
)
699 int hash
= do_hash(key
);
700 int index
= do_lookup(key
, hash
);
701 return do_erase(index
, hash
);
704 iterator
erase(iterator it
)
706 int hash
= do_hash(it
->first
);
707 do_erase(it
.index
, hash
);
711 int count(const K
&key
) const
713 int hash
= do_hash(key
);
714 int i
= do_lookup(key
, hash
);
715 return i
< 0 ? 0 : 1;
718 iterator
find(const K
&key
)
720 int hash
= do_hash(key
);
721 int i
= do_lookup(key
, hash
);
724 return iterator(this, i
);
727 const_iterator
find(const K
&key
) const
729 int hash
= do_hash(key
);
730 int i
= do_lookup(key
, hash
);
733 return const_iterator(this, i
);
736 bool operator[](const K
&key
)
738 int hash
= do_hash(key
);
739 int i
= do_lookup(key
, hash
);
743 void swap(pool
&other
)
745 hashtable
.swap(other
.hashtable
);
746 entries
.swap(other
.entries
);
749 bool operator==(const pool
&other
) const {
750 if (size() != other
.size())
752 for (auto &it
: entries
)
753 if (!other
.count(it
.udata
))
758 bool operator!=(const pool
&other
) const {
759 return !(*this == other
);
762 size_t size() const { return entries
.size(); }
763 bool empty() const { return entries
.empty(); }
764 void clear() { hashtable
.clear(); entries
.clear(); }
766 iterator
begin() { return iterator(this, int(entries
.size())-1); }
767 iterator
end() { return iterator(nullptr, -1); }
769 const_iterator
begin() const { return const_iterator(this, int(entries
.size())-1); }
770 const_iterator
end() const { return const_iterator(nullptr, -1); }
773 } /* namespace hashlib */