d363d68b5cb5014a329543e40bcc6ef3c70a7717
1 // This is free and unencumbered software released into the public domain.
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
8 // -------------------------------------------------------
9 // Written by Clifford Wolf <clifford@clifford.at> in 2014
10 // -------------------------------------------------------
20 #define HASHLIB_SIZE_FACTOR 3
22 // The XOR version of DJB2
23 // (traditionally 5381 is used as starting value for the djb2 hash)
24 inline unsigned int mkhash(unsigned int a
, unsigned int b
) {
25 return ((a
<< 5) + a
) ^ b
;
28 // The ADD version of DJB2
29 // (use this version for cache locality in b)
30 inline unsigned int mkhash_add(unsigned int a
, unsigned int b
) {
31 return ((a
<< 5) + a
) + b
;
34 template<typename T
> struct hash_ops
{
35 bool cmp(const T
&a
, const T
&b
) const {
38 unsigned int hash(const T
&a
) const {
43 template<> struct hash_ops
<int> {
45 bool cmp(T a
, T b
) const {
48 unsigned int hash(unsigned int a
) const {
53 template<> struct hash_ops
<std::string
> {
54 bool cmp(const std::string
&a
, const std::string
&b
) const {
57 unsigned int hash(const std::string
&a
) const {
65 struct hash_cstr_ops
{
66 bool cmp(const char *a
, const char *b
) const {
67 for (int i
= 0; a
[i
] || b
[i
]; i
++)
72 unsigned int hash(const char *a
) const {
73 unsigned int hash
= 5381;
75 hash
= mkhash(hash
, *(a
++));
81 bool cmp(const void *a
, const void *b
) const {
84 unsigned int hash(const void *a
) const {
85 return (unsigned long)a
;
90 bool cmp(const void *a
, const void *b
) const {
94 unsigned int hash(const T
*a
) const {
99 inline int hashtable_size(int old_size
)
101 // prime numbers, approx. in powers of two
102 if (old_size
< 53) return 53;
103 if (old_size
< 113) return 113;
104 if (old_size
< 251) return 251;
105 if (old_size
< 503) return 503;
106 if (old_size
< 1129) return 1129;
107 if (old_size
< 2503) return 2503;
108 if (old_size
< 5023) return 5023;
109 if (old_size
< 11299) return 11299;
110 if (old_size
< 25097) return 25097;
111 if (old_size
< 50291) return 50291;
112 if (old_size
< 112997) return 112997;
113 if (old_size
< 251003) return 251003;
114 if (old_size
< 503003) return 503003;
115 if (old_size
< 1129991) return 1129991;
116 if (old_size
< 2509993) return 2509993;
117 if (old_size
< 5029991) return 5029991;
118 if (old_size
< 11299997) return 11299997;
119 if (old_size
< 25099999) return 25099999;
120 if (old_size
< 50299999) return 50299999;
121 if (old_size
< 113000009) return 113000009;
122 if (old_size
< 250999999) return 250999999;
123 if (old_size
< 503000009) return 503000009;
124 if (old_size
< 1129999999) return 1129999999;
125 throw std::length_error("hash table exceeded maximum size");
128 template<typename K
, typename T
, typename OPS
= hash_ops
<K
>>
134 std::pair
<K
, T
> udata
;
136 entry_t() : link(-1) { }
137 entry_t(const std::pair
<K
, T
> &udata
) : link(1), udata(udata
) { }
139 bool is_free() const { return link
< 0; }
140 int get_next() const { return (link
> 0 ? link
: -link
) - 2; }
141 bool get_last() const { return get_next() == -1; }
142 void set_next_used(int next
) { link
= next
+ 2; }
143 void set_next_free(int next
) { link
= -(next
+ 2); }
146 std::vector
<int> hashtable
;
147 std::vector
<entry_t
> entries
;
148 int free_list
, counter
, begin_n
;
158 void init_from(const dict
<K
, T
, OPS
> &other
)
163 counter
= other
.size();
164 int new_size
= hashtable_size(HASHLIB_SIZE_FACTOR
* counter
);
165 hashtable
.resize(new_size
);
166 new_size
= new_size
/ HASHLIB_SIZE_FACTOR
+ 1;
167 entries
.reserve(new_size
);
169 for (auto &it
: other
)
170 entries
.push_back(entry_t(it
));
171 entries
.resize(new_size
);
175 int mkhash(const K
&key
) const
177 unsigned int hash
= 0;
178 if (!hashtable
.empty())
179 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
188 for (auto &h
: hashtable
)
191 for (int i
= 0; i
< int(entries
.size()); i
++)
192 if (entries
[i
].is_free()) {
193 entries
[i
].set_next_free(free_list
);
196 int hash
= mkhash(entries
[i
].udata
.first
);
197 entries
[i
].set_next_used(hashtable
[hash
]);
203 void do_erase(const K
&key
, int hash
)
206 int index
= hashtable
.empty() ? -1 : hashtable
[hash
];
210 if (ops
.cmp(entries
[index
].udata
.first
, key
)) {
212 hashtable
[hash
] = entries
[index
].get_next();
214 entries
[last_index
].set_next_used(entries
[index
].get_next());
215 entries
[index
].udata
= std::pair
<K
, T
>();
216 entries
[index
].set_next_free(free_list
);
220 else if (index
== begin_n
)
221 do begin_n
--; while (begin_n
>= 0 && entries
[begin_n
].is_free());
225 index
= entries
[index
].get_next();
229 int lookup_index(const K
&key
, int hash
) const
231 int index
= hashtable
.empty() ? -1 : hashtable
[hash
];
235 if (ops
.cmp(entries
[index
].udata
.first
, key
))
237 index
= entries
[index
].get_next();
241 int insert_at(const std::pair
<K
, T
> &value
, int hash
)
245 int i
= entries
.size();
246 int new_size
= hashtable_size(HASHLIB_SIZE_FACTOR
* entries
.size());
247 hashtable
.resize(new_size
);
248 entries
.resize(new_size
/ HASHLIB_SIZE_FACTOR
+ 1);
249 entries
[i
].udata
= value
;
250 entries
[i
].set_next_used(0);
257 free_list
= entries
[i
].get_next();
258 entries
[i
].udata
= value
;
259 entries
[i
].set_next_used(hashtable
[hash
]);
270 dict
<K
, T
, OPS
> *ptr
;
274 iterator(dict
<K
, T
, OPS
> *ptr
, int index
) : ptr(ptr
), index(index
) { }
275 iterator
operator++() { do index
--; while (index
>= 0 && ptr
->entries
[index
].is_free()); return *this; }
276 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
277 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
278 std::pair
<K
, T
> &operator*() { return ptr
->entries
[index
].udata
; }
279 std::pair
<K
, T
> *operator->() { return &ptr
->entries
[index
].udata
; }
280 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
281 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
286 const dict
<K
, T
, OPS
> *ptr
;
290 const_iterator(const dict
<K
, T
, OPS
> *ptr
, int index
) : ptr(ptr
), index(index
) { }
291 const_iterator
operator++() { do index
--; while (index
>= 0 && ptr
->entries
[index
].is_free()); return *this; }
292 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
293 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
294 const std::pair
<K
, T
> &operator*() const { return ptr
->entries
[index
].udata
; }
295 const std::pair
<K
, T
> *operator->() const { return &ptr
->entries
[index
].udata
; }
303 dict(const dict
<K
, T
, OPS
> &other
)
308 dict(dict
<K
, T
, OPS
> &&other
)
315 dict
<K
, T
, OPS
> &operator=(const dict
<K
, T
, OPS
> &other
) {
321 dict
<K
, T
, OPS
> &operator=(dict
<K
, T
, OPS
> &&other
) {
327 dict(const std::initializer_list
<std::pair
<K
, T
>> &list
)
330 for (auto &it
: list
)
334 template<class InputIterator
>
335 dict(InputIterator first
, InputIterator last
)
341 template<class InputIterator
>
342 void insert(InputIterator first
, InputIterator last
)
344 for (; first
!= last
; ++first
)
348 std::pair
<iterator
, bool> insert(const std::pair
<K
, T
> &value
)
350 int hash
= mkhash(value
.first
);
351 int i
= lookup_index(value
.first
, hash
);
353 return std::pair
<iterator
, bool>(iterator(this, i
), false);
354 i
= insert_at(value
, hash
);
355 return std::pair
<iterator
, bool>(iterator(this, i
), true);
358 void erase(const K
&key
)
360 int hash
= mkhash(key
);
364 void erase(const iterator it
)
366 int hash
= mkhash(it
->first
);
367 do_erase(it
->first
, hash
);
370 int count(const K
&key
) const
372 int hash
= mkhash(key
);
373 int i
= lookup_index(key
, hash
);
374 return i
< 0 ? 0 : 1;
377 iterator
find(const K
&key
)
379 int hash
= mkhash(key
);
380 int i
= lookup_index(key
, hash
);
383 return iterator(this, i
);
386 const_iterator
find(const K
&key
) const
388 int hash
= mkhash(key
);
389 int i
= lookup_index(key
, hash
);
392 return const_iterator(this, i
);
397 int hash
= mkhash(key
);
398 int i
= lookup_index(key
, hash
);
400 throw std::out_of_range("dict::at()");
401 return entries
[i
].udata
.second
;
404 const T
& at(const K
&key
) const
406 int hash
= mkhash(key
);
407 int i
= lookup_index(key
, hash
);
409 throw std::out_of_range("dict::at()");
410 return entries
[i
].udata
.second
;
413 T
& operator[](const K
&key
)
415 int hash
= mkhash(key
);
416 int i
= lookup_index(key
, hash
);
418 i
= insert_at(std::pair
<K
, T
>(key
, T()), hash
);
419 return entries
[i
].udata
.second
;
422 void swap(dict
<K
, T
, OPS
> &other
)
424 hashtable
.swap(other
.hashtable
);
425 entries
.swap(other
.entries
);
426 std::swap(free_list
, other
.free_list
);
427 std::swap(counter
, other
.counter
);
428 std::swap(begin_n
, other
.begin_n
);
431 bool operator==(const dict
<K
, T
, OPS
> &other
) const {
432 if (counter
!= other
.counter
)
436 if (entries
.size() < other
.entries
.size())
437 for (auto &it
: *this) {
438 auto oit
= other
.find(it
.first
);
439 if (oit
== other
.end() || oit
->second
!= it
.second
)
443 for (auto &oit
: other
) {
444 auto it
= find(oit
.first
);
445 if (it
== end() || it
->second
!= oit
.second
)
451 bool operator!=(const dict
<K
, T
, OPS
> &other
) const {
452 return !(*this == other
);
455 size_t size() const { return counter
; }
456 bool empty() const { return counter
== 0; }
457 void clear() { hashtable
.clear(); entries
.clear(); init(); }
459 iterator
begin() { return iterator(this, begin_n
); }
460 iterator
end() { return iterator(this, -1); }
462 const_iterator
begin() const { return const_iterator(this, begin_n
); }
463 const_iterator
end() const { return const_iterator(this, -1); }
466 template<typename K
, typename OPS
= hash_ops
<K
>>
474 entry_t() : link(-1) { }
475 entry_t(const K
&key
) : link(1), key(key
) { }
477 bool is_free() const { return link
< 0; }
478 int get_next() const { return (link
> 0 ? link
: -link
) - 2; }
479 bool get_last() const { return get_next() == -1; }
480 void set_next_used(int next
) { link
= next
+ 2; }
481 void set_next_free(int next
) { link
= -(next
+ 2); }
484 std::vector
<int> hashtable
;
485 std::vector
<entry_t
> entries
;
486 int free_list
, counter
, begin_n
;
496 void init_from(const pool
<K
, OPS
> &other
)
501 counter
= other
.size();
502 int new_size
= hashtable_size(HASHLIB_SIZE_FACTOR
* counter
);
503 hashtable
.resize(new_size
);
504 new_size
= new_size
/ HASHLIB_SIZE_FACTOR
+ 1;
505 entries
.reserve(new_size
);
507 for (auto &it
: other
)
508 entries
.push_back(entry_t(it
));
509 entries
.resize(new_size
);
513 int mkhash(const K
&key
) const
515 unsigned int hash
= 0;
516 if (!hashtable
.empty())
517 hash
= ops
.hash(key
) % (unsigned int)(hashtable
.size());
526 for (auto &h
: hashtable
)
529 for (int i
= 0; i
< int(entries
.size()); i
++)
530 if (entries
[i
].is_free()) {
531 entries
[i
].set_next_free(free_list
);
534 int hash
= mkhash(entries
[i
].key
);
535 entries
[i
].set_next_used(hashtable
[hash
]);
541 void do_erase(const K
&key
, int hash
)
544 int index
= hashtable
.empty() ? -1 : hashtable
[hash
];
548 if (ops
.cmp(entries
[index
].key
, key
)) {
550 hashtable
[hash
] = entries
[index
].get_next();
552 entries
[last_index
].set_next_used(entries
[index
].get_next());
553 entries
[index
].key
= K();
554 entries
[index
].set_next_free(free_list
);
558 else if (index
== begin_n
)
559 do begin_n
--; while (begin_n
>= 0 && entries
[begin_n
].is_free());
563 index
= entries
[index
].get_next();
567 int lookup_index(const K
&key
, int hash
) const
569 int index
= hashtable
.empty() ? -1 : hashtable
[hash
];
573 if (ops
.cmp(entries
[index
].key
, key
))
575 index
= entries
[index
].get_next();
579 int insert_at(const K
&key
, int hash
)
583 int i
= entries
.size();
584 int new_size
= hashtable_size(HASHLIB_SIZE_FACTOR
* entries
.size());
585 hashtable
.resize(new_size
);
586 entries
.resize(new_size
/ HASHLIB_SIZE_FACTOR
+ 1);
587 entries
[i
].key
= key
;
588 entries
[i
].set_next_used(0);
595 free_list
= entries
[i
].get_next();
596 entries
[i
].key
= key
;
597 entries
[i
].set_next_used(hashtable
[hash
]);
612 iterator(pool
<K
, OPS
> *ptr
, int index
) : ptr(ptr
), index(index
) { }
613 iterator
operator++() { do index
--; while (index
>= 0 && ptr
->entries
[index
].is_free()); return *this; }
614 bool operator==(const iterator
&other
) const { return index
== other
.index
; }
615 bool operator!=(const iterator
&other
) const { return index
!= other
.index
; }
616 K
&operator*() { return ptr
->entries
[index
].key
; }
617 K
*operator->() { return &ptr
->entries
[index
].key
; }
618 const K
&operator*() const { return ptr
->entries
[index
].key
; }
619 const K
*operator->() const { return &ptr
->entries
[index
].key
; }
624 const pool
<K
, OPS
> *ptr
;
628 const_iterator(const pool
<K
, OPS
> *ptr
, int index
) : ptr(ptr
), index(index
) { }
629 const_iterator
operator++() { do index
--; while (index
>= 0 && ptr
->entries
[index
].is_free()); return *this; }
630 bool operator==(const const_iterator
&other
) const { return index
== other
.index
; }
631 bool operator!=(const const_iterator
&other
) const { return index
!= other
.index
; }
632 const K
&operator*() const { return ptr
->entries
[index
].key
; }
633 const K
*operator->() const { return &ptr
->entries
[index
].key
; }
641 pool(const pool
<K
, OPS
> &other
)
646 pool(pool
<K
, OPS
> &&other
)
653 pool
<K
, OPS
> &operator=(const pool
<K
, OPS
> &other
) {
659 pool
<K
, OPS
> &operator=(pool
<K
, OPS
> &&other
) {
665 pool(const std::initializer_list
<K
> &list
)
668 for (auto &it
: list
)
672 template<class InputIterator
>
673 pool(InputIterator first
, InputIterator last
)
679 template<class InputIterator
>
680 void insert(InputIterator first
, InputIterator last
)
682 for (; first
!= last
; ++first
)
686 std::pair
<iterator
, bool> insert(const K
&key
)
688 int hash
= mkhash(key
);
689 int i
= lookup_index(key
, hash
);
691 return std::pair
<iterator
, bool>(iterator(this, i
), false);
692 i
= insert_at(key
, hash
);
693 return std::pair
<iterator
, bool>(iterator(this, i
), true);
696 void erase(const K
&key
)
698 int hash
= mkhash(key
);
702 void erase(const iterator it
)
704 int hash
= mkhash(*it
);
708 int count(const K
&key
) const
710 int hash
= mkhash(key
);
711 int i
= lookup_index(key
, hash
);
712 return i
< 0 ? 0 : 1;
715 iterator
find(const K
&key
)
717 int hash
= mkhash(key
);
718 int i
= lookup_index(key
, hash
);
721 return iterator(this, i
);
724 const_iterator
find(const K
&key
) const
726 int hash
= mkhash(key
);
727 int i
= lookup_index(key
, hash
);
730 return const_iterator(this, i
);
733 bool operator[](const K
&key
) const
735 int hash
= mkhash(key
);
736 int i
= lookup_index(key
, hash
);
740 void swap(pool
<K
, OPS
> &other
)
742 hashtable
.swap(other
.hashtable
);
743 entries
.swap(other
.entries
);
744 std::swap(free_list
, other
.free_list
);
745 std::swap(counter
, other
.counter
);
746 std::swap(begin_n
, other
.begin_n
);
749 bool operator==(const pool
<K
, OPS
> &other
) const {
750 if (counter
!= other
.counter
)
754 if (entries
.size() < other
.entries
.size())
755 for (auto &it
: *this) {
756 auto oit
= other
.find(it
.first
);
757 if (oit
== other
.end() || oit
->second
!= it
.second
)
761 for (auto &oit
: other
) {
762 auto it
= find(oit
.first
);
763 if (it
== end() || it
->second
!= oit
.second
)
769 bool operator!=(const pool
<K
, OPS
> &other
) const {
770 return !(*this == other
);
773 size_t size() const { return counter
; }
774 bool empty() const { return counter
== 0; }
775 void clear() { hashtable
.clear(); entries
.clear(); init(); }
777 iterator
begin() { return iterator(this, begin_n
); }
778 iterator
end() { return iterator(this, -1); }
780 const_iterator
begin() const { return const_iterator(this, begin_n
); }
781 const_iterator
end() const { return const_iterator(this, -1); }
784 } /* namespace hashlib */