X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=kernel%2Fhashlib.h;h=e7cb312ed87f63f4adf944c05bce7a4d4fd42d4e;hb=f3405e7c7990f7ce1dc23d7a464c9a30e641126c;hp=d363d68b5cb5014a329543e40bcc6ef3c70a7717;hpb=89723a45cf86a508d052dd54aa058719e314cc3c;p=yosys.git diff --git a/kernel/hashlib.h b/kernel/hashlib.h index d363d68b5..e7cb312ed 100644 --- a/kernel/hashlib.h +++ b/kernel/hashlib.h @@ -1,5 +1,5 @@ // This is free and unencumbered software released into the public domain. -// +// // Anyone is free to copy, modify, publish, use, compile, sell, or // distribute this software, either in source code form or as a compiled // binary, for any purpose, commercial or non-commercial, and by any @@ -10,51 +10,80 @@ // ------------------------------------------------------- #ifndef HASHLIB_H +#define HASHLIB_H #include +#include #include #include namespace hashlib { -#define HASHLIB_SIZE_FACTOR 3 +const int hashtable_size_trigger = 2; +const int hashtable_size_factor = 3; // The XOR version of DJB2 -// (traditionally 5381 is used as starting value for the djb2 hash) inline unsigned int mkhash(unsigned int a, unsigned int b) { return ((a << 5) + a) ^ b; } +// traditionally 5381 is used as starting value for the djb2 hash +const unsigned int mkhash_init = 5381; + // The ADD version of DJB2 // (use this version for cache locality in b) inline unsigned int mkhash_add(unsigned int a, unsigned int b) { return ((a << 5) + a) + b; } +inline unsigned int mkhash_xorshift(unsigned int a) { + if (sizeof(a) == 4) { + a ^= a << 13; + a ^= a >> 17; + a ^= a << 5; + } else if (sizeof(a) == 8) { + a ^= a << 13; + a ^= a >> 7; + a ^= a << 17; + } else + throw std::runtime_error("mkhash_xorshift() only implemented for 32 bit and 64 bit ints"); + return a; +} + template struct hash_ops { - bool cmp(const T &a, const T &b) const { + static inline bool cmp(const T &a, const T &b) { return a == b; } - unsigned int hash(const T &a) const { + static inline unsigned int hash(const T &a) { return a.hash(); } }; -template<> struct hash_ops { +struct hash_int_ops { template - bool cmp(T a, T b) const { + static inline bool cmp(T a, T b) { return a == b; } - unsigned int hash(unsigned int a) const { +}; + +template<> struct hash_ops : hash_int_ops +{ + static inline unsigned int hash(int32_t a) { return a; } }; +template<> struct hash_ops : hash_int_ops +{ + static inline unsigned int hash(int64_t a) { + return mkhash((unsigned int)(a), (unsigned int)(a >> 32)); + } +}; template<> struct hash_ops { - bool cmp(const std::string &a, const std::string &b) const { + static inline bool cmp(const std::string &a, const std::string &b) { return a == b; } - unsigned int hash(const std::string &a) const { + static inline unsigned int hash(const std::string &a) { unsigned int v = 0; for (auto c : a) v = mkhash(v, c); @@ -62,117 +91,137 @@ template<> struct hash_ops { } }; +template struct hash_ops> { + static inline bool cmp(std::pair a, std::pair b) { + return a == b; + } + static inline unsigned int hash(std::pair a) { + return mkhash(hash_ops

::hash(a.first), hash_ops::hash(a.second)); + } +}; + +template struct hash_ops> { + static inline bool cmp(std::tuple a, std::tuple b) { + return a == b; + } + template + static inline typename std::enable_if::type hash(std::tuple) { + return mkhash_init; + } + template + static inline typename std::enable_if::type hash(std::tuple a) { + typedef hash_ops>::type> element_ops_t; + return mkhash(hash(a), element_ops_t::hash(std::get(a))); + } +}; + +template struct hash_ops> { + static inline bool cmp(std::vector a, std::vector b) { + return a == b; + } + static inline unsigned int hash(std::vector a) { + unsigned int h = mkhash_init; + for (auto k : a) + h = mkhash(h, hash_ops::hash(k)); + return h; + } +}; + struct hash_cstr_ops { - bool cmp(const char *a, const char *b) const { + static inline bool cmp(const char *a, const char *b) { for (int i = 0; a[i] || b[i]; i++) if (a[i] != b[i]) return false; return true; } - unsigned int hash(const char *a) const { - unsigned int hash = 5381; + static inline unsigned int hash(const char *a) { + unsigned int hash = mkhash_init; while (*a) - hash = mkhash(hash, *(a++)); - return hash; + hash = mkhash(hash, *(a++)); + return hash; } }; struct hash_ptr_ops { - bool cmp(const void *a, const void *b) const { + static inline bool cmp(const void *a, const void *b) { return a == b; } - unsigned int hash(const void *a) const { - return (unsigned long)a; + static inline unsigned int hash(const void *a) { + return (uintptr_t)a; } }; struct hash_obj_ops { - bool cmp(const void *a, const void *b) const { + static inline bool cmp(const void *a, const void *b) { return a == b; } template - unsigned int hash(const T *a) const { - return a->hash(); + static inline unsigned int hash(const T *a) { + return a ? a->hash() : 0; } }; -inline int hashtable_size(int old_size) +template +inline unsigned int mkhash(const T &v) { + return hash_ops().hash(v); +} + +inline int hashtable_size(int min_size) { - // prime numbers, approx. in powers of two - if (old_size < 53) return 53; - if (old_size < 113) return 113; - if (old_size < 251) return 251; - if (old_size < 503) return 503; - if (old_size < 1129) return 1129; - if (old_size < 2503) return 2503; - if (old_size < 5023) return 5023; - if (old_size < 11299) return 11299; - if (old_size < 25097) return 25097; - if (old_size < 50291) return 50291; - if (old_size < 112997) return 112997; - if (old_size < 251003) return 251003; - if (old_size < 503003) return 503003; - if (old_size < 1129991) return 1129991; - if (old_size < 2509993) return 2509993; - if (old_size < 5029991) return 5029991; - if (old_size < 11299997) return 11299997; - if (old_size < 25099999) return 25099999; - if (old_size < 50299999) return 50299999; - if (old_size < 113000009) return 113000009; - if (old_size < 250999999) return 250999999; - if (old_size < 503000009) return 503000009; - if (old_size < 1129999999) return 1129999999; - throw std::length_error("hash table exceeded maximum size"); + static std::vector zero_and_some_primes = { + 0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677, + 853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289, + 12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281, + 120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697, + 897133, 1121423, 1401791, 1752239, 2190299, 2737937, 3422429, 4278037, + 5347553, 6684443, 8355563, 10444457, 13055587, 16319519, 20399411, + 25499291, 31874149, 39842687, 49803361, 62254207, 77817767, 97272239, + 121590311, 151987889, 189984863, 237481091, 296851369, 371064217 + }; + + for (auto p : zero_and_some_primes) + if (p >= min_size) return p; + + if (sizeof(int) == 4) + throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables."); + + for (auto p : zero_and_some_primes) + if (100129 * p > min_size) return 100129 * p; + + throw std::length_error("hash table exceeded maximum size."); } -template> +template> class dict; +template> class idict; +template> class pool; +template> class mfp; + +template class dict { struct entry_t { - int link; std::pair udata; + int next; - entry_t() : link(-1) { } - entry_t(const std::pair &udata) : link(1), udata(udata) { } - - bool is_free() const { return link < 0; } - int get_next() const { return (link > 0 ? link : -link) - 2; } - bool get_last() const { return get_next() == -1; } - void set_next_used(int next) { link = next + 2; } - void set_next_free(int next) { link = -(next + 2); } + entry_t() { } + entry_t(const std::pair &udata, int next) : udata(udata), next(next) { } + entry_t(std::pair &&udata, int next) : udata(std::move(udata)), next(next) { } }; std::vector hashtable; std::vector entries; - int free_list, counter, begin_n; OPS ops; - void init() - { - free_list = -1; - counter = 0; - begin_n = -1; - } - - void init_from(const dict &other) - { - hashtable.clear(); - entries.clear(); - - counter = other.size(); - int new_size = hashtable_size(HASHLIB_SIZE_FACTOR * counter); - hashtable.resize(new_size); - new_size = new_size / HASHLIB_SIZE_FACTOR + 1; - entries.reserve(new_size); - - for (auto &it : other) - entries.push_back(entry_t(it)); - entries.resize(new_size); - rehash(); +#ifdef NDEBUG + static inline void do_assert(bool) { } +#else + static inline void do_assert(bool cond) { + if (!cond) throw std::runtime_error("dict<> assert failed."); } +#endif - int mkhash(const K &key) const + int do_hash(const K &key) const { unsigned int hash = 0; if (!hashtable.empty()) @@ -180,145 +229,174 @@ class dict return hash; } - void rehash() + void do_rehash() { - free_list = -1; - begin_n = -1; + hashtable.clear(); + hashtable.resize(hashtable_size(entries.capacity() * hashtable_size_factor), -1); - for (auto &h : hashtable) - h = -1; + for (int i = 0; i < int(entries.size()); i++) { + do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size())); + int hash = do_hash(entries[i].udata.first); + entries[i].next = hashtable[hash]; + hashtable[hash] = i; + } + } - for (int i = 0; i < int(entries.size()); i++) - if (entries[i].is_free()) { - entries[i].set_next_free(free_list); - free_list = i; - } else { - int hash = mkhash(entries[i].udata.first); - entries[i].set_next_used(hashtable[hash]); - hashtable[hash] = i; - begin_n = i; + int do_erase(int index, int hash) + { + do_assert(index < int(entries.size())); + if (hashtable.empty() || index < 0) + return 0; + + int k = hashtable[hash]; + do_assert(0 <= k && k < int(entries.size())); + + if (k == index) { + hashtable[hash] = entries[index].next; + } else { + while (entries[k].next != index) { + k = entries[k].next; + do_assert(0 <= k && k < int(entries.size())); } - } + entries[k].next = entries[index].next; + } + + int back_idx = entries.size()-1; - void do_erase(const K &key, int hash) - { - int last_index = -1; - int index = hashtable.empty() ? -1 : hashtable[hash]; - while (1) { - if (index < 0) - return; - if (ops.cmp(entries[index].udata.first, key)) { - if (last_index < 0) - hashtable[hash] = entries[index].get_next(); - else - entries[last_index].set_next_used(entries[index].get_next()); - entries[index].udata = std::pair(); - entries[index].set_next_free(free_list); - free_list = index; - if (--counter == 0) - clear(); - else if (index == begin_n) - do begin_n--; while (begin_n >= 0 && entries[begin_n].is_free()); - return; + if (index != back_idx) + { + int back_hash = do_hash(entries[back_idx].udata.first); + + k = hashtable[back_hash]; + do_assert(0 <= k && k < int(entries.size())); + + if (k == back_idx) { + hashtable[back_hash] = index; + } else { + while (entries[k].next != back_idx) { + k = entries[k].next; + do_assert(0 <= k && k < int(entries.size())); + } + entries[k].next = index; } - last_index = index; - index = entries[index].get_next(); + + entries[index] = std::move(entries[back_idx]); } + + entries.pop_back(); + + if (entries.empty()) + hashtable.clear(); + + return 1; } - int lookup_index(const K &key, int hash) const + int do_lookup(const K &key, int &hash) const { - int index = hashtable.empty() ? -1 : hashtable[hash]; - while (1) { - if (index < 0) - return -1; - if (ops.cmp(entries[index].udata.first, key)) - return index; - index = entries[index].get_next(); + if (hashtable.empty()) + return -1; + + if (entries.size() * hashtable_size_trigger > hashtable.size()) { + ((dict*)this)->do_rehash(); + hash = do_hash(key); + } + + int index = hashtable[hash]; + + while (index >= 0 && !ops.cmp(entries[index].udata.first, key)) { + index = entries[index].next; + do_assert(-1 <= index && index < int(entries.size())); } + + return index; } - int insert_at(const std::pair &value, int hash) + int do_insert(const K &key, int &hash) { - if (free_list < 0) - { - int i = entries.size(); - int new_size = hashtable_size(HASHLIB_SIZE_FACTOR * entries.size()); - hashtable.resize(new_size); - entries.resize(new_size / HASHLIB_SIZE_FACTOR + 1); - entries[i].udata = value; - entries[i].set_next_used(0); - counter++; - rehash(); - return i; + if (hashtable.empty()) { + entries.push_back(entry_t(std::pair(key, T()), -1)); + do_rehash(); + hash = do_hash(key); + } else { + entries.push_back(entry_t(std::pair(key, T()), hashtable[hash])); + hashtable[hash] = entries.size() - 1; } + return entries.size() - 1; + } - int i = free_list; - free_list = entries[i].get_next(); - entries[i].udata = value; - entries[i].set_next_used(hashtable[hash]); - hashtable[hash] = i; - if (begin_n < i) - begin_n = i; - counter++; - return i; + int do_insert(const std::pair &value, int &hash) + { + if (hashtable.empty()) { + entries.push_back(entry_t(value, -1)); + do_rehash(); + hash = do_hash(value.first); + } else { + entries.push_back(entry_t(value, hashtable[hash])); + hashtable[hash] = entries.size() - 1; + } + return entries.size() - 1; } public: - class iterator + class const_iterator : public std::iterator> { - dict *ptr; + friend class dict; + protected: + const dict *ptr; int index; + const_iterator(const dict *ptr, int index) : ptr(ptr), index(index) { } public: - iterator() { } - iterator(dict *ptr, int index) : ptr(ptr), index(index) { } - iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; } - bool operator==(const iterator &other) const { return index == other.index; } - bool operator!=(const iterator &other) const { return index != other.index; } - std::pair &operator*() { return ptr->entries[index].udata; } - std::pair *operator->() { return &ptr->entries[index].udata; } + const_iterator() { } + const_iterator operator++() { index--; return *this; } + bool operator<(const const_iterator &other) const { return index > other.index; } + bool operator==(const const_iterator &other) const { return index == other.index; } + bool operator!=(const const_iterator &other) const { return index != other.index; } const std::pair &operator*() const { return ptr->entries[index].udata; } const std::pair *operator->() const { return &ptr->entries[index].udata; } }; - class const_iterator + class iterator : public std::iterator> { - const dict *ptr; + friend class dict; + protected: + dict *ptr; int index; + iterator(dict *ptr, int index) : ptr(ptr), index(index) { } public: - const_iterator() { } - const_iterator(const dict *ptr, int index) : ptr(ptr), index(index) { } - const_iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; } - bool operator==(const const_iterator &other) const { return index == other.index; } - bool operator!=(const const_iterator &other) const { return index != other.index; } + iterator() { } + iterator operator++() { index--; return *this; } + bool operator<(const iterator &other) const { return index > other.index; } + bool operator==(const iterator &other) const { return index == other.index; } + bool operator!=(const iterator &other) const { return index != other.index; } + std::pair &operator*() { return ptr->entries[index].udata; } + std::pair *operator->() { return &ptr->entries[index].udata; } const std::pair &operator*() const { return ptr->entries[index].udata; } const std::pair *operator->() const { return &ptr->entries[index].udata; } + operator const_iterator() const { return const_iterator(ptr, index); } }; dict() { - init(); } - dict(const dict &other) + dict(const dict &other) { - init_from(other); + entries = other.entries; + do_rehash(); } - dict(dict &&other) + dict(dict &&other) { - free_list = -1; - counter = 0; swap(other); } - dict &operator=(const dict &other) { - if (this != &other) - init_from(other); + dict &operator=(const dict &other) { + entries = other.entries; + do_rehash(); return *this; } - dict &operator=(dict &&other) { + dict &operator=(dict &&other) { clear(); swap(other); return *this; @@ -326,7 +404,6 @@ public: dict(const std::initializer_list> &list) { - init(); for (auto &it : list) insert(it); } @@ -334,7 +411,6 @@ public: template dict(InputIterator first, InputIterator last) { - init(); insert(first, last); } @@ -345,39 +421,58 @@ public: insert(*first); } + std::pair insert(const K &key) + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i >= 0) + return std::pair(iterator(this, i), false); + i = do_insert(key, hash); + return std::pair(iterator(this, i), true); + } + std::pair insert(const std::pair &value) { - int hash = mkhash(value.first); - int i = lookup_index(value.first, hash); + int hash = do_hash(value.first); + int i = do_lookup(value.first, hash); if (i >= 0) return std::pair(iterator(this, i), false); - i = insert_at(value, hash); + i = do_insert(value, hash); return std::pair(iterator(this, i), true); } - void erase(const K &key) + int erase(const K &key) { - int hash = mkhash(key); - do_erase(key, hash); + int hash = do_hash(key); + int index = do_lookup(key, hash); + return do_erase(index, hash); } - void erase(const iterator it) + iterator erase(iterator it) { - int hash = mkhash(it->first); - do_erase(it->first, hash); + int hash = do_hash(it->first); + do_erase(it.index, hash); + return ++it; } int count(const K &key) const { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); return i < 0 ? 0 : 1; } + int count(const K &key, const_iterator it) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + return i < 0 || i > it.index ? 0 : 1; + } + iterator find(const K &key) { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); if (i < 0) return end(); return iterator(this, i); @@ -385,8 +480,8 @@ public: const_iterator find(const K &key) const { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); if (i < 0) return end(); return const_iterator(this, i); @@ -394,8 +489,8 @@ public: T& at(const K &key) { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); if (i < 0) throw std::out_of_range("dict::at()"); return entries[i].udata.second; @@ -403,114 +498,101 @@ public: const T& at(const K &key) const { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); if (i < 0) throw std::out_of_range("dict::at()"); return entries[i].udata.second; } + T at(const K &key, const T &defval) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + if (i < 0) + return defval; + return entries[i].udata.second; + } + T& operator[](const K &key) { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); if (i < 0) - i = insert_at(std::pair(key, T()), hash); + i = do_insert(std::pair(key, T()), hash); return entries[i].udata.second; } - void swap(dict &other) + template> + void sort(Compare comp = Compare()) + { + std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata.first, a.udata.first); }); + do_rehash(); + } + + void swap(dict &other) { hashtable.swap(other.hashtable); entries.swap(other.entries); - std::swap(free_list, other.free_list); - std::swap(counter, other.counter); - std::swap(begin_n, other.begin_n); } - bool operator==(const dict &other) const { - if (counter != other.counter) + bool operator==(const dict &other) const { + if (size() != other.size()) return false; - if (counter == 0) - return true; - if (entries.size() < other.entries.size()) - for (auto &it : *this) { - auto oit = other.find(it.first); - if (oit == other.end() || oit->second != it.second) - return false; - } - else - for (auto &oit : other) { - auto it = find(oit.first); - if (it == end() || it->second != oit.second) - return false; - } + for (auto &it : entries) { + auto oit = other.find(it.udata.first); + if (oit == other.end() || !(oit->second == it.udata.second)) + return false; + } return true; } - bool operator!=(const dict &other) const { - return !(*this == other); + bool operator!=(const dict &other) const { + return !operator==(other); } - size_t size() const { return counter; } - bool empty() const { return counter == 0; } - void clear() { hashtable.clear(); entries.clear(); init(); } + void reserve(size_t n) { entries.reserve(n); } + size_t size() const { return entries.size(); } + bool empty() const { return entries.empty(); } + void clear() { hashtable.clear(); entries.clear(); } - iterator begin() { return iterator(this, begin_n); } - iterator end() { return iterator(this, -1); } + iterator begin() { return iterator(this, int(entries.size())-1); } + iterator element(int n) { return iterator(this, int(entries.size())-1-n); } + iterator end() { return iterator(nullptr, -1); } - const_iterator begin() const { return const_iterator(this, begin_n); } - const_iterator end() const { return const_iterator(this, -1); } + const_iterator begin() const { return const_iterator(this, int(entries.size())-1); } + const_iterator element(int n) const { return const_iterator(this, int(entries.size())-1-n); } + const_iterator end() const { return const_iterator(nullptr, -1); } }; -template> +template class pool { + template friend class idict; + +protected: struct entry_t { - int link; - K key; - - entry_t() : link(-1) { } - entry_t(const K &key) : link(1), key(key) { } + K udata; + int next; - bool is_free() const { return link < 0; } - int get_next() const { return (link > 0 ? link : -link) - 2; } - bool get_last() const { return get_next() == -1; } - void set_next_used(int next) { link = next + 2; } - void set_next_free(int next) { link = -(next + 2); } + entry_t() { } + entry_t(const K &udata, int next) : udata(udata), next(next) { } }; std::vector hashtable; std::vector entries; - int free_list, counter, begin_n; OPS ops; - void init() - { - free_list = -1; - counter = 0; - begin_n = -1; - } - - void init_from(const pool &other) - { - hashtable.clear(); - entries.clear(); - - counter = other.size(); - int new_size = hashtable_size(HASHLIB_SIZE_FACTOR * counter); - hashtable.resize(new_size); - new_size = new_size / HASHLIB_SIZE_FACTOR + 1; - entries.reserve(new_size); - - for (auto &it : other) - entries.push_back(entry_t(it)); - entries.resize(new_size); - rehash(); +#ifdef NDEBUG + static inline void do_assert(bool) { } +#else + static inline void do_assert(bool cond) { + if (!cond) throw std::runtime_error("pool<> assert failed."); } +#endif - int mkhash(const K &key) const + int do_hash(const K &key) const { unsigned int hash = 0; if (!hashtable.empty()) @@ -518,145 +600,155 @@ class pool return hash; } - void rehash() + void do_rehash() { - free_list = -1; - begin_n = -1; + hashtable.clear(); + hashtable.resize(hashtable_size(entries.capacity() * hashtable_size_factor), -1); - for (auto &h : hashtable) - h = -1; + for (int i = 0; i < int(entries.size()); i++) { + do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size())); + int hash = do_hash(entries[i].udata); + entries[i].next = hashtable[hash]; + hashtable[hash] = i; + } + } - for (int i = 0; i < int(entries.size()); i++) - if (entries[i].is_free()) { - entries[i].set_next_free(free_list); - free_list = i; - } else { - int hash = mkhash(entries[i].key); - entries[i].set_next_used(hashtable[hash]); - hashtable[hash] = i; - begin_n = i; + int do_erase(int index, int hash) + { + do_assert(index < int(entries.size())); + if (hashtable.empty() || index < 0) + return 0; + + int k = hashtable[hash]; + if (k == index) { + hashtable[hash] = entries[index].next; + } else { + while (entries[k].next != index) { + k = entries[k].next; + do_assert(0 <= k && k < int(entries.size())); } - } + entries[k].next = entries[index].next; + } - void do_erase(const K &key, int hash) - { - int last_index = -1; - int index = hashtable.empty() ? -1 : hashtable[hash]; - while (1) { - if (index < 0) - return; - if (ops.cmp(entries[index].key, key)) { - if (last_index < 0) - hashtable[hash] = entries[index].get_next(); - else - entries[last_index].set_next_used(entries[index].get_next()); - entries[index].key = K(); - entries[index].set_next_free(free_list); - free_list = index; - if (--counter == 0) - clear(); - else if (index == begin_n) - do begin_n--; while (begin_n >= 0 && entries[begin_n].is_free()); - return; + int back_idx = entries.size()-1; + + if (index != back_idx) + { + int back_hash = do_hash(entries[back_idx].udata); + + k = hashtable[back_hash]; + if (k == back_idx) { + hashtable[back_hash] = index; + } else { + while (entries[k].next != back_idx) { + k = entries[k].next; + do_assert(0 <= k && k < int(entries.size())); + } + entries[k].next = index; } - last_index = index; - index = entries[index].get_next(); + + entries[index] = std::move(entries[back_idx]); } + + entries.pop_back(); + + if (entries.empty()) + hashtable.clear(); + + return 1; } - int lookup_index(const K &key, int hash) const + int do_lookup(const K &key, int &hash) const { - int index = hashtable.empty() ? -1 : hashtable[hash]; - while (1) { - if (index < 0) - return -1; - if (ops.cmp(entries[index].key, key)) - return index; - index = entries[index].get_next(); + if (hashtable.empty()) + return -1; + + if (entries.size() * hashtable_size_trigger > hashtable.size()) { + ((pool*)this)->do_rehash(); + hash = do_hash(key); + } + + int index = hashtable[hash]; + + while (index >= 0 && !ops.cmp(entries[index].udata, key)) { + index = entries[index].next; + do_assert(-1 <= index && index < int(entries.size())); } + + return index; } - int insert_at(const K &key, int hash) + int do_insert(const K &value, int &hash) { - if (free_list < 0) - { - int i = entries.size(); - int new_size = hashtable_size(HASHLIB_SIZE_FACTOR * entries.size()); - hashtable.resize(new_size); - entries.resize(new_size / HASHLIB_SIZE_FACTOR + 1); - entries[i].key = key; - entries[i].set_next_used(0); - counter++; - rehash(); - return i; + if (hashtable.empty()) { + entries.push_back(entry_t(value, -1)); + do_rehash(); + hash = do_hash(value); + } else { + entries.push_back(entry_t(value, hashtable[hash])); + hashtable[hash] = entries.size() - 1; } - - int i = free_list; - free_list = entries[i].get_next(); - entries[i].key = key; - entries[i].set_next_used(hashtable[hash]); - hashtable[hash] = i; - if (begin_n < i) - begin_n = i; - counter++; - return i; + return entries.size() - 1; } public: - class iterator + class const_iterator : public std::iterator { - pool *ptr; + friend class pool; + protected: + const pool *ptr; int index; + const_iterator(const pool *ptr, int index) : ptr(ptr), index(index) { } public: - iterator() { } - iterator(pool *ptr, int index) : ptr(ptr), index(index) { } - iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; } - bool operator==(const iterator &other) const { return index == other.index; } - bool operator!=(const iterator &other) const { return index != other.index; } - K &operator*() { return ptr->entries[index].key; } - K *operator->() { return &ptr->entries[index].key; } - const K &operator*() const { return ptr->entries[index].key; } - const K *operator->() const { return &ptr->entries[index].key; } + const_iterator() { } + const_iterator operator++() { index--; return *this; } + bool operator==(const const_iterator &other) const { return index == other.index; } + bool operator!=(const const_iterator &other) const { return index != other.index; } + const K &operator*() const { return ptr->entries[index].udata; } + const K *operator->() const { return &ptr->entries[index].udata; } }; - class const_iterator + class iterator : public std::iterator { - const pool *ptr; + friend class pool; + protected: + pool *ptr; int index; + iterator(pool *ptr, int index) : ptr(ptr), index(index) { } public: - const_iterator() { } - const_iterator(const pool *ptr, int index) : ptr(ptr), index(index) { } - const_iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; } - bool operator==(const const_iterator &other) const { return index == other.index; } - bool operator!=(const const_iterator &other) const { return index != other.index; } - const K &operator*() const { return ptr->entries[index].key; } - const K *operator->() const { return &ptr->entries[index].key; } + iterator() { } + iterator operator++() { index--; return *this; } + bool operator==(const iterator &other) const { return index == other.index; } + bool operator!=(const iterator &other) const { return index != other.index; } + K &operator*() { return ptr->entries[index].udata; } + K *operator->() { return &ptr->entries[index].udata; } + const K &operator*() const { return ptr->entries[index].udata; } + const K *operator->() const { return &ptr->entries[index].udata; } + operator const_iterator() const { return const_iterator(ptr, index); } }; pool() { - init(); } - pool(const pool &other) + pool(const pool &other) { - init_from(other); + entries = other.entries; + do_rehash(); } - pool(pool &&other) + pool(pool &&other) { - free_list = -1; - counter = 0; swap(other); } - pool &operator=(const pool &other) { - if (this != &other) - init_from(other); + pool &operator=(const pool &other) { + entries = other.entries; + do_rehash(); return *this; } - pool &operator=(pool &&other) { + pool &operator=(pool &&other) { clear(); swap(other); return *this; @@ -664,7 +756,6 @@ public: pool(const std::initializer_list &list) { - init(); for (auto &it : list) insert(it); } @@ -672,7 +763,6 @@ public: template pool(InputIterator first, InputIterator last) { - init(); insert(first, last); } @@ -683,39 +773,48 @@ public: insert(*first); } - std::pair insert(const K &key) + std::pair insert(const K &value) { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(value); + int i = do_lookup(value, hash); if (i >= 0) return std::pair(iterator(this, i), false); - i = insert_at(key, hash); + i = do_insert(value, hash); return std::pair(iterator(this, i), true); } - void erase(const K &key) + int erase(const K &key) { - int hash = mkhash(key); - do_erase(key, hash); + int hash = do_hash(key); + int index = do_lookup(key, hash); + return do_erase(index, hash); } - void erase(const iterator it) + iterator erase(iterator it) { - int hash = mkhash(*it); - do_erase(*it, hash); + int hash = do_hash(*it); + do_erase(it.index, hash); + return ++it; } int count(const K &key) const { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); return i < 0 ? 0 : 1; } + int count(const K &key, const_iterator it) const + { + int hash = do_hash(key); + int i = do_lookup(key, hash); + return i < 0 || i > it.index ? 0 : 1; + } + iterator find(const K &key) { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); if (i < 0) return end(); return iterator(this, i); @@ -723,62 +822,242 @@ public: const_iterator find(const K &key) const { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); if (i < 0) return end(); return const_iterator(this, i); } - bool operator[](const K &key) const + bool operator[](const K &key) { - int hash = mkhash(key); - int i = lookup_index(key, hash); + int hash = do_hash(key); + int i = do_lookup(key, hash); return i >= 0; } - void swap(pool &other) + template> + void sort(Compare comp = Compare()) + { + std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata, a.udata); }); + do_rehash(); + } + + K pop() + { + iterator it = begin(); + K ret = *it; + erase(it); + return ret; + } + + void swap(pool &other) { hashtable.swap(other.hashtable); entries.swap(other.entries); - std::swap(free_list, other.free_list); - std::swap(counter, other.counter); - std::swap(begin_n, other.begin_n); } - bool operator==(const pool &other) const { - if (counter != other.counter) + bool operator==(const pool &other) const { + if (size() != other.size()) return false; - if (counter == 0) - return true; - if (entries.size() < other.entries.size()) - for (auto &it : *this) { - auto oit = other.find(it.first); - if (oit == other.end() || oit->second != it.second) - return false; - } - else - for (auto &oit : other) { - auto it = find(oit.first); - if (it == end() || it->second != oit.second) - return false; - } + for (auto &it : entries) + if (!other.count(it.udata)) + return false; return true; } - bool operator!=(const pool &other) const { - return !(*this == other); + bool operator!=(const pool &other) const { + return !operator==(other); + } + + bool hash() const { + unsigned int hashval = mkhash_init; + for (auto &it : entries) + hashval ^= ops.hash(it.udata); + return hashval; + } + + void reserve(size_t n) { entries.reserve(n); } + size_t size() const { return entries.size(); } + bool empty() const { return entries.empty(); } + void clear() { hashtable.clear(); entries.clear(); } + + iterator begin() { return iterator(this, int(entries.size())-1); } + iterator element(int n) { return iterator(this, int(entries.size())-1-n); } + iterator end() { return iterator(nullptr, -1); } + + const_iterator begin() const { return const_iterator(this, int(entries.size())-1); } + const_iterator element(int n) const { return const_iterator(this, int(entries.size())-1-n); } + const_iterator end() const { return const_iterator(nullptr, -1); } +}; + +template +class idict +{ + pool database; + +public: + typedef typename pool::const_iterator const_iterator; + + int operator()(const K &key) + { + int hash = database.do_hash(key); + int i = database.do_lookup(key, hash); + if (i < 0) + i = database.do_insert(key, hash); + return i + offset; + } + + int at(const K &key) const + { + int hash = database.do_hash(key); + int i = database.do_lookup(key, hash); + if (i < 0) + throw std::out_of_range("idict::at()"); + return i + offset; + } + + int at(const K &key, int defval) const + { + int hash = database.do_hash(key); + int i = database.do_lookup(key, hash); + if (i < 0) + return defval; + return i + offset; + } + + int count(const K &key) const + { + int hash = database.do_hash(key); + int i = database.do_lookup(key, hash); + return i < 0 ? 0 : 1; + } + + void expect(const K &key, int i) + { + int j = (*this)(key); + if (i != j) + throw std::out_of_range("idict::expect()"); + } + + const K &operator[](int index) const + { + return database.entries.at(index - offset).udata; + } + + void swap(idict &other) + { + database.swap(other.database); + } + + void reserve(size_t n) { database.reserve(n); } + size_t size() const { return database.size(); } + bool empty() const { return database.empty(); } + void clear() { database.clear(); } + + const_iterator begin() const { return database.begin(); } + const_iterator element(int n) const { return database.element(n); } + const_iterator end() const { return database.end(); } +}; + +template +class mfp +{ + mutable idict database; + mutable std::vector parents; + +public: + typedef typename idict::const_iterator const_iterator; + + int operator()(const K &key) const + { + int i = database(key); + parents.resize(database.size(), -1); + return i; + } + + const K &operator[](int index) const + { + return database[index]; + } + + int ifind(int i) const + { + int p = i, k = i; + + while (parents[p] != -1) + p = parents[p]; + + while (k != p) { + int next_k = parents[k]; + parents[k] = p; + k = next_k; + } + + return p; + } + + void imerge(int i, int j) + { + i = ifind(i); + j = ifind(j); + + if (i != j) + parents[i] = j; + } + + void ipromote(int i) + { + int k = i; + + while (k != -1) { + int next_k = parents[k]; + parents[k] = i; + k = next_k; + } + + parents[i] = -1; } - size_t size() const { return counter; } - bool empty() const { return counter == 0; } - void clear() { hashtable.clear(); entries.clear(); init(); } + int lookup(const K &a) const + { + return ifind((*this)(a)); + } + + const K &find(const K &a) const + { + int i = database.at(a, -1); + if (i < 0) + return a; + return (*this)[ifind(i)]; + } + + void merge(const K &a, const K &b) + { + imerge((*this)(a), (*this)(b)); + } + + void promote(const K &a) + { + int i = database.at(a, -1); + if (i >= 0) + ipromote(i); + } + + void swap(mfp &other) + { + database.swap(other.database); + parents.swap(other.parents); + } - iterator begin() { return iterator(this, begin_n); } - iterator end() { return iterator(this, -1); } + void reserve(size_t n) { database.reserve(n); } + size_t size() const { return database.size(); } + bool empty() const { return database.empty(); } + void clear() { database.clear(); parents.clear(); } - const_iterator begin() const { return const_iterator(this, begin_n); } - const_iterator end() const { return const_iterator(this, -1); } + const_iterator begin() const { return database.begin(); } + const_iterator element(int n) const { return database.element(n); } + const_iterator end() const { return database.end(); } }; } /* namespace hashlib */