1 // This is free and unencumbered software released into the public domain.
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
8 // -------------------------------------------------------
9 // Written by Clifford Wolf <clifford@clifford.at> in 2014
10 // -------------------------------------------------------
21 const int hashtable_size_trigger
= 2;
22 const int hashtable_size_factor
= 3;
24 // The XOR version of DJB2
25 inline unsigned int mkhash(unsigned int a
, unsigned int b
) {
26 return ((a
<< 5) + a
) ^ b
;
29 // traditionally 5381 is used as starting value for the djb2 hash
30 const unsigned int mkhash_init
= 5381;
32 // The ADD version of DJB2
33 // (usunsigned int mkhashe this version for cache locality in b)
34 inline unsigned int mkhash_add(unsigned int a
, unsigned int b
) {
35 return ((a
<< 5) + a
) + b
;
38 inline unsigned int mkhash_xorshift(unsigned int a
) {
43 } else if (sizeof(a
) == 8) {
48 throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints");
52 template<typename T
> struct hash_ops
{
53 static inline bool cmp(const T
&a
, const T
&b
) {
56 static inline unsigned int hash(const T
&a
) {
61 template<> struct hash_ops
<int> {
63 static inline bool cmp(T a
, T b
) {
67 static inline unsigned int hash(T a
) {
72 template<> struct hash_ops
<std::string
> {
73 static inline bool cmp(const std::string
&a
, const std::string
&b
) {
76 static inline unsigned int hash(const std::string
&a
) {
84 template<typename P
, typename Q
> struct hash_ops
<std::pair
<P
, Q
>> {
85 static inline bool cmp(std::pair
<P
, Q
> a
, std::pair
<P
, Q
> b
) {
88 static inline unsigned int hash(std::pair
<P
, Q
> a
) {
91 return mkhash(p_ops
.hash(a
.first
), q_ops
.hash(a
.second
));
95 template<typename T
> struct hash_ops
<std::vector
<T
>> {
96 static inline bool cmp(std::vector
<T
> a
, std::vector
<T
> b
) {
99 static inline unsigned int hash(std::vector
<T
> a
) {
101 unsigned int h
= mkhash_init
;
103 h
= mkhash(h
, t_ops
.hash(k
));
108 struct hash_cstr_ops
{
109 static inline bool cmp(const char *a
, const char *b
) {
110 for (int i
= 0; a
[i
] || b
[i
]; i
++)
115 static inline unsigned int hash(const char *a
) {
116 unsigned int hash
= mkhash_init
;
118 hash
= mkhash(hash
, *(a
++));
123 struct hash_ptr_ops
{
124 static inline bool cmp(const void *a
, const void *b
) {
127 static inline unsigned int hash(const void *a
) {
128 return (unsigned long)a
;
132 struct hash_obj_ops
{
133 static inline bool cmp(const void *a
, const void *b
) {
137 static inline unsigned int hash(const T
*a
) {
142 inline int hashtable_size(int min_size
)
144 static std::vector
<int> zero_and_some_primes
= {
145 0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677,
146 853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289,
147 12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281,
148 120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697,
149 897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037,
150 5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411,
151 25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239,
152 121590311, 151987889, 189984863, 237481091, 296851369, 371064217
155 for (auto p
: zero_and_some_primes
)
156 if (p
>= min_size
) return p
;
158 if (sizeof(int) == 4)
159 throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables.");
161 for (auto p
: zero_and_some_primes
)
162 if (100129 * p
> min_size
) return 100129 * p
;
164 throw std::length_error("hash table exceeded maximum size.");
167 template<typename K
, typename T
, typename OPS
= hash_ops
<K
>> class dict
;
168 template<typename K
, int offset
= 0, typename OPS
= hash_ops
<K
>> class idict
;
169 template<typename K
, typename OPS
= hash_ops
<K
>> class pool
;
171 template<typename K
, typename T
, typename OPS
>
176 std::pair
<K
, T
> udata
;
180 entry_t(const std::pair
<K
, T
> &udata
, int next
) : udata(udata
), next(next
) { }
183 std::vector
<int> hashtable
;
184 std::vector
<entry_t
> entries
;
188 static inline void do_assert(bool cond
) {
189 if (!cond
) throw std::runtime_error("dict<> assert failed.");
192 static inline void do_assert(bool) { }
195 int do_hash(const K
&key
) const
197 unsigned int hash
= 0;
198 if (!hashtable
.empty())
199 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
206 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
208 for (int i
= 0; i
< int(entries
.size()); i
++) {
209 do_assert(-1 <= entries
[i
].next
&& entries
[i
].next
< int(entries
.size()));
210 int hash
= do_hash(entries
[i
].udata
.first
);
211 entries
[i
].next
= hashtable
[hash
];
216 int do_erase(int index
, int hash
)
218 do_assert(index
< int(entries
.size()));
219 if (hashtable
.empty() || index
< 0)
222 int k
= hashtable
[hash
];
224 hashtable
[hash
] = entries
[index
].next
;
226 while (entries
[k
].next
!= index
) {
228 do_assert(0 <= k
&& k
< int(entries
.size()));
230 entries
[k
].next
= entries
[index
].next
;
233 int back_idx
= entries
.size()-1;
235 if (index
!= back_idx
)
237 int back_hash
= do_hash(entries
[back_idx
].udata
.first
);
239 k
= hashtable
[back_hash
];
241 hashtable
[back_hash
] = index
;
243 while (entries
[k
].next
!= back_idx
) {
245 do_assert(0 <= k
&& k
< int(entries
.size()));
247 entries
[k
].next
= index
;
250 entries
[index
] = std::move(entries
[back_idx
]);
261 int do_lookup(const K
&key
, int &hash
) const
263 if (hashtable
.empty())
266 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
267 ((dict
*)this)->do_rehash();
271 int index
= hashtable
[hash
];
273 while (index
>= 0 && !ops
.cmp(entries
[index
].udata
.first
, key
)) {
274 index
= entries
[index
].next
;
275 do_assert(-1 <= index
&& index
< int(entries
.size()));
281 int do_insert(const std::pair
<K
, T
> &value
, int &hash
)
283 if (hashtable
.empty()) {
284 entries
.push_back(entry_t(value
, -1));
286 hash
= do_hash(value
.first
);
288 entries
.push_back(entry_t(value
, hashtable
[hash
]));
289 hashtable
[hash
] = entries
.size() - 1;
291 return entries
.size() - 1;
295 class const_iterator
: public std::iterator
<std::forward_iterator_tag
, std::pair
<K
, T
>>
301 const_iterator(const dict
*ptr
, int index
) : ptr(ptr
), index(index
) { }
304 const_iterator
operator++() { index
--; return *this; }
305 bool operator<(const const_iterator
&other
) const { return index
> other
.index
; }
306 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
307 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
308 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
309 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
312 class iterator
: public std::iterator
<std::forward_iterator_tag
, std::pair
<K
, T
>>
318 iterator(dict
*ptr
, int index
) : ptr(ptr
), index(index
) { }
321 iterator
operator++() { index
--; return *this; }
322 bool operator<(const iterator
&other
) const { return index
> other
.index
; }
323 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
324 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
325 std::pair
<K
, T
> &operator*() { return ptr
->entries
[index
].udata
; }
326 std::pair
<K
, T
> *operator->() { return &ptr
->entries
[index
].udata
; }
327 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
328 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
329 operator const_iterator() const { return const_iterator(ptr
, index
); }
336 dict(const dict
&other
)
338 entries
= other
.entries
;
347 dict
&operator=(const dict
&other
) {
348 entries
= other
.entries
;
353 dict
&operator=(dict
&&other
) {
359 dict(const std::initializer_list
<std::pair
<K
, T
>> &list
)
361 for (auto &it
: list
)
365 template<class InputIterator
>
366 dict(InputIterator first
, InputIterator last
)
371 template<class InputIterator
>
372 void insert(InputIterator first
, InputIterator last
)
374 for (; first
!= last
; ++first
)
378 std::pair
<iterator
, bool> insert(const std::pair
<K
, T
> &value
)
380 int hash
= do_hash(value
.first
);
381 int i
= do_lookup(value
.first
, hash
);
383 return std::pair
<iterator
, bool>(iterator(this, i
), false);
384 i
= do_insert(value
, hash
);
385 return std::pair
<iterator
, bool>(iterator(this, i
), true);
388 int erase(const K
&key
)
390 int hash
= do_hash(key
);
391 int index
= do_lookup(key
, hash
);
392 return do_erase(index
, hash
);
395 iterator
erase(iterator it
)
397 int hash
= do_hash(it
->first
);
398 do_erase(it
.index
, hash
);
402 int count(const K
&key
) const
404 int hash
= do_hash(key
);
405 int i
= do_lookup(key
, hash
);
406 return i
< 0 ? 0 : 1;
409 int count(const K
&key
, const_iterator it
) const
411 int hash
= do_hash(key
);
412 int i
= do_lookup(key
, hash
);
413 return i
< 0 || i
> it
.index
? 0 : 1;
416 iterator
find(const K
&key
)
418 int hash
= do_hash(key
);
419 int i
= do_lookup(key
, hash
);
422 return iterator(this, i
);
425 const_iterator
find(const K
&key
) const
427 int hash
= do_hash(key
);
428 int i
= do_lookup(key
, hash
);
431 return const_iterator(this, i
);
436 int hash
= do_hash(key
);
437 int i
= do_lookup(key
, hash
);
439 throw std::out_of_range("dict::at()");
440 return entries
[i
].udata
.second
;
443 const T
& at(const K
&key
) const
445 int hash
= do_hash(key
);
446 int i
= do_lookup(key
, hash
);
448 throw std::out_of_range("dict::at()");
449 return entries
[i
].udata
.second
;
452 T
& operator[](const K
&key
)
454 int hash
= do_hash(key
);
455 int i
= do_lookup(key
, hash
);
457 i
= do_insert(std::pair
<K
, T
>(key
, T()), hash
);
458 return entries
[i
].udata
.second
;
461 template<typename Compare
= std::less
<K
>>
462 void sort(Compare comp
= Compare())
464 std::sort(entries
.begin(), entries
.end(), [comp
](const entry_t
&a
, const entry_t
&b
){ return comp(b
.udata
.first
, a
.udata
.first
); });
468 void swap(dict
&other
)
470 hashtable
.swap(other
.hashtable
);
471 entries
.swap(other
.entries
);
474 bool operator==(const dict
&other
) const {
475 if (size() != other
.size())
477 for (auto &it
: entries
) {
478 auto oit
= other
.find(it
.udata
.first
);
479 if (oit
== other
.end() || oit
->second
!= it
.udata
.second
)
485 bool operator!=(const dict
&other
) const {
486 return !(*this == other
);
489 size_t size() const { return entries
.size(); }
490 bool empty() const { return entries
.empty(); }
491 void clear() { hashtable
.clear(); entries
.clear(); }
493 iterator
begin() { return iterator(this, int(entries
.size())-1); }
494 iterator
end() { return iterator(nullptr, -1); }
496 const_iterator
begin() const { return const_iterator(this, int(entries
.size())-1); }
497 const_iterator
end() const { return const_iterator(nullptr, -1); }
500 template<typename K
, typename OPS
>
503 template<typename
, int, typename
> friend class idict
;
512 entry_t(const K
&udata
, int next
) : udata(udata
), next(next
) { }
515 std::vector
<int> hashtable
;
516 std::vector
<entry_t
> entries
;
520 static inline void do_assert(bool cond
) {
521 if (!cond
) throw std::runtime_error("pool<> assert failed.");
524 static inline void do_assert(bool) { }
527 int do_hash(const K
&key
) const
529 unsigned int hash
= 0;
530 if (!hashtable
.empty())
531 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
538 hashtable
.resize(hashtable_size(entries
.size() * hashtable_size_factor
), -1);
540 for (int i
= 0; i
< int(entries
.size()); i
++) {
541 do_assert(-1 <= entries
[i
].next
&& entries
[i
].next
< int(entries
.size()));
542 int hash
= do_hash(entries
[i
].udata
);
543 entries
[i
].next
= hashtable
[hash
];
548 int do_erase(int index
, int hash
)
550 do_assert(index
< int(entries
.size()));
551 if (hashtable
.empty() || index
< 0)
554 int k
= hashtable
[hash
];
556 hashtable
[hash
] = entries
[index
].next
;
558 while (entries
[k
].next
!= index
) {
560 do_assert(0 <= k
&& k
< int(entries
.size()));
562 entries
[k
].next
= entries
[index
].next
;
565 int back_idx
= entries
.size()-1;
567 if (index
!= back_idx
)
569 int back_hash
= do_hash(entries
[back_idx
].udata
);
571 k
= hashtable
[back_hash
];
573 hashtable
[back_hash
] = index
;
575 while (entries
[k
].next
!= back_idx
) {
577 do_assert(0 <= k
&& k
< int(entries
.size()));
579 entries
[k
].next
= index
;
582 entries
[index
] = std::move(entries
[back_idx
]);
593 int do_lookup(const K
&key
, int &hash
) const
595 if (hashtable
.empty())
598 if (entries
.size() * hashtable_size_trigger
> hashtable
.size()) {
599 ((pool
*)this)->do_rehash();
603 int index
= hashtable
[hash
];
605 while (index
>= 0 && !ops
.cmp(entries
[index
].udata
, key
)) {
606 index
= entries
[index
].next
;
607 do_assert(-1 <= index
&& index
< int(entries
.size()));
613 int do_insert(const K
&value
, int &hash
)
615 if (hashtable
.empty()) {
616 entries
.push_back(entry_t(value
, -1));
618 hash
= do_hash(value
);
620 entries
.push_back(entry_t(value
, hashtable
[hash
]));
621 hashtable
[hash
] = entries
.size() - 1;
623 return entries
.size() - 1;
627 class const_iterator
: public std::iterator
<std::forward_iterator_tag
, K
>
633 const_iterator(const pool
*ptr
, int index
) : ptr(ptr
), index(index
) { }
636 const_iterator
operator++() { index
--; return *this; }
637 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
638 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
639 const K
&operator*() const { return ptr
->entries
[index
].udata
; }
640 const K
*operator->() const { return &ptr
->entries
[index
].udata
; }
643 class iterator
: public std::iterator
<std::forward_iterator_tag
, K
>
649 iterator(pool
*ptr
, int index
) : ptr(ptr
), index(index
) { }
652 iterator
operator++() { index
--; return *this; }
653 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
654 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
655 K
&operator*() { return ptr
->entries
[index
].udata
; }
656 K
*operator->() { return &ptr
->entries
[index
].udata
; }
657 const K
&operator*() const { return ptr
->entries
[index
].udata
; }
658 const K
*operator->() const { return &ptr
->entries
[index
].udata
; }
659 operator const_iterator() const { return const_iterator(ptr
, index
); }
666 pool(const pool
&other
)
668 entries
= other
.entries
;
677 pool
&operator=(const pool
&other
) {
678 entries
= other
.entries
;
683 pool
&operator=(pool
&&other
) {
689 pool(const std::initializer_list
<K
> &list
)
691 for (auto &it
: list
)
695 template<class InputIterator
>
696 pool(InputIterator first
, InputIterator last
)
701 template<class InputIterator
>
702 void insert(InputIterator first
, InputIterator last
)
704 for (; first
!= last
; ++first
)
708 std::pair
<iterator
, bool> insert(const K
&value
)
710 int hash
= do_hash(value
);
711 int i
= do_lookup(value
, hash
);
713 return std::pair
<iterator
, bool>(iterator(this, i
), false);
714 i
= do_insert(value
, hash
);
715 return std::pair
<iterator
, bool>(iterator(this, i
), true);
718 int erase(const K
&key
)
720 int hash
= do_hash(key
);
721 int index
= do_lookup(key
, hash
);
722 return do_erase(index
, hash
);
725 iterator
erase(iterator it
)
727 int hash
= do_hash(*it
);
728 do_erase(it
.index
, hash
);
732 int count(const K
&key
) const
734 int hash
= do_hash(key
);
735 int i
= do_lookup(key
, hash
);
736 return i
< 0 ? 0 : 1;
739 int count(const K
&key
, const_iterator it
) const
741 int hash
= do_hash(key
);
742 int i
= do_lookup(key
, hash
);
743 return i
< 0 || i
> it
.index
? 0 : 1;
746 iterator
find(const K
&key
)
748 int hash
= do_hash(key
);
749 int i
= do_lookup(key
, hash
);
752 return iterator(this, i
);
755 const_iterator
find(const K
&key
) const
757 int hash
= do_hash(key
);
758 int i
= do_lookup(key
, hash
);
761 return const_iterator(this, i
);
764 bool operator[](const K
&key
)
766 int hash
= do_hash(key
);
767 int i
= do_lookup(key
, hash
);
771 template<typename Compare
= std::less
<K
>>
772 void sort(Compare comp
= Compare())
774 std::sort(entries
.begin(), entries
.end(), [comp
](const entry_t
&a
, const entry_t
&b
){ return comp(b
.udata
, a
.udata
); });
778 void swap(pool
&other
)
780 hashtable
.swap(other
.hashtable
);
781 entries
.swap(other
.entries
);
784 bool operator==(const pool
&other
) const {
785 if (size() != other
.size())
787 for (auto &it
: entries
)
788 if (!other
.count(it
.udata
))
793 bool operator!=(const pool
&other
) const {
794 return !(*this == other
);
797 size_t size() const { return entries
.size(); }
798 bool empty() const { return entries
.empty(); }
799 void clear() { hashtable
.clear(); entries
.clear(); }
801 iterator
begin() { return iterator(this, int(entries
.size())-1); }
802 iterator
end() { return iterator(nullptr, -1); }
804 const_iterator
begin() const { return const_iterator(this, int(entries
.size())-1); }
805 const_iterator
end() const { return const_iterator(nullptr, -1); }
808 template<typename K
, int offset
, typename OPS
>
811 pool
<K
, OPS
> database
;
814 typedef typename pool
<K
, OPS
>::const_iterator const_iterator
;
816 int operator()(const K
&key
)
818 int hash
= database
.do_hash(key
);
819 int i
= database
.do_lookup(key
, hash
);
821 i
= database
.do_insert(key
, hash
);
825 int at(const K
&key
) const
827 int hash
= database
.do_hash(key
);
828 int i
= database
.do_lookup(key
, hash
);
830 throw std::out_of_range("idict::at()");
834 int count(const K
&key
) const
836 int hash
= database
.do_hash(key
);
837 int i
= database
.do_lookup(key
, hash
);
838 return i
< 0 ? 0 : 1;
841 void expect(const K
&key
, int i
)
843 int j
= (*this)(key
);
845 throw std::out_of_range("idict::expect()");
848 const K
&operator[](int index
) const
850 return database
.entries
.at(index
- offset
).udata
;
853 const_iterator
begin() const { return database
.begin(); }
854 const_iterator
end() const { return database
.end(); }
857 } /* namespace hashlib */