// This is free and unencumbered software released into the public domain.
-//
+//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// -------------------------------------------------------
#ifndef HASHLIB_H
+#define HASHLIB_H
#include <stdexcept>
+#include <algorithm>
#include <string>
#include <vector>
const unsigned int mkhash_init = 5381;
// The ADD version of DJB2
-// (usunsigned int mkhashe this version for cache locality in b)
+// (use this version for cache locality in b)
inline unsigned int mkhash_add(unsigned int a, unsigned int b) {
return ((a << 5) + a) + b;
}
}
template<typename T> struct hash_ops {
- bool cmp(const T &a, const T &b) const {
+ static inline bool cmp(const T &a, const T &b) {
return a == b;
}
- unsigned int hash(const T &a) const {
+ static inline unsigned int hash(const T &a) {
return a.hash();
}
};
-template<> struct hash_ops<int> {
+struct hash_int_ops {
template<typename T>
- bool cmp(T a, T b) const {
+ static inline bool cmp(T a, T b) {
return a == b;
}
- unsigned int hash(unsigned int a) const {
+};
+
+template<> struct hash_ops<int32_t> : hash_int_ops
+{
+ static inline unsigned int hash(int32_t a) {
return a;
}
};
+template<> struct hash_ops<int64_t> : hash_int_ops
+{
+ static inline unsigned int hash(int64_t a) {
+ return mkhash((unsigned int)(a), (unsigned int)(a >> 32));
+ }
+};
template<> struct hash_ops<std::string> {
- bool cmp(const std::string &a, const std::string &b) const {
+ static inline bool cmp(const std::string &a, const std::string &b) {
return a == b;
}
- unsigned int hash(const std::string &a) const {
+ static inline unsigned int hash(const std::string &a) {
unsigned int v = 0;
for (auto c : a)
v = mkhash(v, c);
};
template<typename P, typename Q> struct hash_ops<std::pair<P, Q>> {
- bool cmp(std::pair<P, Q> a, std::pair<P, Q> b) const {
+ static inline bool cmp(std::pair<P, Q> a, std::pair<P, Q> b) {
+ return a == b;
+ }
+ static inline unsigned int hash(std::pair<P, Q> a) {
+ return mkhash(hash_ops<P>::hash(a.first), hash_ops<Q>::hash(a.second));
+ }
+};
+
+template<typename... T> struct hash_ops<std::tuple<T...>> {
+ static inline bool cmp(std::tuple<T...> a, std::tuple<T...> b) {
return a == b;
}
- unsigned int hash(std::pair<P, Q> a) const {
- hash_ops<P> p_ops;
- hash_ops<Q> q_ops;
- return mkhash(p_ops.hash(a.first), q_ops.hash(a.second));
+ template<size_t I = 0>
+ static inline typename std::enable_if<I == sizeof...(T), unsigned int>::type hash(std::tuple<T...>) {
+ return mkhash_init;
+ }
+ template<size_t I = 0>
+ static inline typename std::enable_if<I != sizeof...(T), unsigned int>::type hash(std::tuple<T...> a) {
+ typedef hash_ops<typename std::tuple_element<I, std::tuple<T...>>::type> element_ops_t;
+ return mkhash(hash<I+1>(a), element_ops_t::hash(std::get<I>(a)));
+ }
+};
+
+template<typename T> struct hash_ops<std::vector<T>> {
+ static inline bool cmp(std::vector<T> a, std::vector<T> b) {
+ return a == b;
+ }
+ static inline unsigned int hash(std::vector<T> a) {
+ unsigned int h = mkhash_init;
+ for (auto k : a)
+ h = mkhash(h, hash_ops<T>::hash(k));
+ return h;
}
};
struct hash_cstr_ops {
- bool cmp(const char *a, const char *b) const {
+ static inline bool cmp(const char *a, const char *b) {
for (int i = 0; a[i] || b[i]; i++)
if (a[i] != b[i])
return false;
return true;
}
- unsigned int hash(const char *a) const {
+ static inline unsigned int hash(const char *a) {
unsigned int hash = mkhash_init;
while (*a)
- hash = mkhash(hash, *(a++));
- return hash;
+ hash = mkhash(hash, *(a++));
+ return hash;
}
};
struct hash_ptr_ops {
- bool cmp(const void *a, const void *b) const {
+ static inline bool cmp(const void *a, const void *b) {
return a == b;
}
- unsigned int hash(const void *a) const {
- return (unsigned long)a;
+ static inline unsigned int hash(const void *a) {
+ return (uintptr_t)a;
}
};
struct hash_obj_ops {
- bool cmp(const void *a, const void *b) const {
+ static inline bool cmp(const void *a, const void *b) {
return a == b;
}
template<typename T>
- unsigned int hash(const T *a) const {
- return a->hash();
+ static inline unsigned int hash(const T *a) {
+ return a ? a->hash() : 0;
}
};
+template<typename T>
+inline unsigned int mkhash(const T &v) {
+ return hash_ops<T>().hash(v);
+}
+
inline int hashtable_size(int min_size)
{
- static std::vector<int> primes = {
- 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677,
+ static std::vector<int> zero_and_some_primes = {
+ 0, 23, 29, 37, 47, 59, 79, 101, 127, 163, 211, 269, 337, 431, 541, 677,
853, 1069, 1361, 1709, 2137, 2677, 3347, 4201, 5261, 6577, 8231, 10289,
12889, 16127, 20161, 25219, 31531, 39419, 49277, 61603, 77017, 96281,
120371, 150473, 188107, 235159, 293957, 367453, 459317, 574157, 717697,
121590311, 151987889, 189984863, 237481091, 296851369, 371064217
};
- for (auto p : primes)
- if (p > min_size) return p;
+ for (auto p : zero_and_some_primes)
+ if (p >= min_size) return p;
if (sizeof(int) == 4)
throw std::length_error("hash table exceeded maximum size. use a ILP64 abi for larger tables.");
- for (auto p : primes)
+ for (auto p : zero_and_some_primes)
if (100129 * p > min_size) return 100129 * p;
throw std::length_error("hash table exceeded maximum size.");
}
-template<typename K, typename T, typename OPS = hash_ops<K>>
+template<typename K, typename T, typename OPS = hash_ops<K>> class dict;
+template<typename K, int offset = 0, typename OPS = hash_ops<K>> class idict;
+template<typename K, typename OPS = hash_ops<K>> class pool;
+template<typename K, typename OPS = hash_ops<K>> class mfp;
+
+template<typename K, typename T, typename OPS>
class dict
{
struct entry_t
{
- int link;
std::pair<K, T> udata;
+ int next;
- entry_t() : link(-1) { }
- entry_t(const std::pair<K, T> &udata) : link(1), udata(udata) { }
-
- bool is_free() const { return link < 0; }
- int get_next() const { return (link > 0 ? link : -link) - 2; }
- bool get_last() const { return get_next() == -1; }
- void set_next_used(int next) { link = next + 2; }
- void set_next_free(int next) { link = -(next + 2); }
+ entry_t() { }
+ entry_t(const std::pair<K, T> &udata, int next) : udata(udata), next(next) { }
+ entry_t(std::pair<K, T> &&udata, int next) : udata(std::move(udata)), next(next) { }
};
std::vector<int> hashtable;
std::vector<entry_t> entries;
- int free_list, counter, begin_n;
- int begin_seek_count;
OPS ops;
- void init()
- {
- free_list = -1;
- counter = 0;
- begin_n = -1;
- begin_seek_count = 0;
- }
-
- void init_from(const dict<K, T, OPS> &other)
- {
- hashtable.clear();
- entries.clear();
-
- counter = other.size();
- begin_n = counter - 1;
- entries.reserve(counter);
-
- for (auto &it : other)
- entries.push_back(entry_t(it));
-
- rehash();
+#ifdef NDEBUG
+ static inline void do_assert(bool) { }
+#else
+ static inline void do_assert(bool cond) {
+ if (!cond) throw std::runtime_error("dict<> assert failed.");
}
+#endif
- int mkhash(const K &key) const
+ int do_hash(const K &key) const
{
unsigned int hash = 0;
if (!hashtable.empty())
return hash;
}
- void upd_begin_n(bool do_refree = true)
+ void do_rehash()
{
- if (begin_n < -1) {
- begin_n = -(begin_n+2);
- while (begin_n >= 0 && entries[begin_n].is_free()) { begin_seek_count++; begin_n--; }
- if (do_refree && begin_seek_count > int(entries.size() / 2)) refree();
+ hashtable.clear();
+ hashtable.resize(hashtable_size(entries.capacity() * hashtable_size_factor), -1);
+
+ for (int i = 0; i < int(entries.size()); i++) {
+ do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size()));
+ int hash = do_hash(entries[i].udata.first);
+ entries[i].next = hashtable[hash];
+ hashtable[hash] = i;
}
}
- void refree()
+ int do_erase(int index, int hash)
{
- free_list = -1;
- begin_n = -1;
+ do_assert(index < int(entries.size()));
+ if (hashtable.empty() || index < 0)
+ return 0;
+
+ int k = hashtable[hash];
+ do_assert(0 <= k && k < int(entries.size()));
+
+ if (k == index) {
+ hashtable[hash] = entries[index].next;
+ } else {
+ while (entries[k].next != index) {
+ k = entries[k].next;
+ do_assert(0 <= k && k < int(entries.size()));
+ }
+ entries[k].next = entries[index].next;
+ }
- int last_free = -1;
- for (int i = 0; i < int(entries.size()); i++)
- if (entries[i].is_free()) {
- if (last_free != -1)
- entries[last_free].set_next_free(i);
- else
- free_list = i;
- last_free = i;
- } else
- begin_n = i;
+ int back_idx = entries.size()-1;
- if (last_free != -1)
- entries[last_free].set_next_free(-1);
+ if (index != back_idx)
+ {
+ int back_hash = do_hash(entries[back_idx].udata.first);
- begin_seek_count = 0;
+ k = hashtable[back_hash];
+ do_assert(0 <= k && k < int(entries.size()));
+
+ if (k == back_idx) {
+ hashtable[back_hash] = index;
+ } else {
+ while (entries[k].next != back_idx) {
+ k = entries[k].next;
+ do_assert(0 <= k && k < int(entries.size()));
+ }
+ entries[k].next = index;
+ }
+
+ entries[index] = std::move(entries[back_idx]);
+ }
+
+ entries.pop_back();
+
+ if (entries.empty())
+ hashtable.clear();
+
+ return 1;
}
- void rehash()
+ int do_lookup(const K &key, int &hash) const
{
- upd_begin_n(false);
- entries.resize(begin_n + 1);
+ if (hashtable.empty())
+ return -1;
- free_list = -1;
- begin_n = -1;
+ if (entries.size() * hashtable_size_trigger > hashtable.size()) {
+ ((dict*)this)->do_rehash();
+ hash = do_hash(key);
+ }
- hashtable.clear();
- hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1);
-
- int last_free = -1;
- for (int i = 0; i < int(entries.size()); i++)
- if (entries[i].is_free()) {
- if (last_free != -1)
- entries[last_free].set_next_free(i);
- else
- free_list = i;
- last_free = i;
- } else {
- int hash = mkhash(entries[i].udata.first);
- entries[i].set_next_used(hashtable[hash]);
- hashtable[hash] = i;
- begin_n = i;
- }
+ int index = hashtable[hash];
- if (last_free != -1)
- entries[last_free].set_next_free(-1);
-
- begin_seek_count = 0;
- }
-
- int do_erase(const K &key, int hash)
- {
- int last_index = -1;
- int index = hashtable.empty() ? -1 : hashtable[hash];
- while (1) {
- if (index < 0)
- return 0;
- if (ops.cmp(entries[index].udata.first, key)) {
- if (last_index < 0)
- hashtable[hash] = entries[index].get_next();
- else
- entries[last_index].set_next_used(entries[index].get_next());
- entries[index].udata = std::pair<K, T>();
- entries[index].set_next_free(free_list);
- free_list = index;
- if (--counter == 0)
- clear();
- else if (index == begin_n)
- begin_n = -(begin_n+2);
- return 1;
- }
- last_index = index;
- index = entries[index].get_next();
+ while (index >= 0 && !ops.cmp(entries[index].udata.first, key)) {
+ index = entries[index].next;
+ do_assert(-1 <= index && index < int(entries.size()));
}
+
+ return index;
}
- int lookup_index(const K &key, int hash) const
+ int do_insert(const K &key, int &hash)
{
- int index = hashtable.empty() ? -1 : hashtable[hash];
- while (1) {
- if (index < 0)
- return -1;
- if (ops.cmp(entries[index].udata.first, key))
- return index;
- index = entries[index].get_next();
+ if (hashtable.empty()) {
+ entries.emplace_back(std::pair<K, T>(key, T()), -1);
+ do_rehash();
+ hash = do_hash(key);
+ } else {
+ entries.emplace_back(std::pair<K, T>(key, T()), hashtable[hash]);
+ hashtable[hash] = entries.size() - 1;
}
+ return entries.size() - 1;
}
- int insert_at(const std::pair<K, T> &value, int hash)
+ int do_insert(const std::pair<K, T> &value, int &hash)
{
- if (free_list < 0)
- {
- free_list = entries.size();
- entries.push_back(entry_t());
-
- if (entries.size() * hashtable_size_trigger > hashtable.size()) {
- int i = free_list;
- entries[i].udata = value;
- entries[i].set_next_used(0);
- begin_n = i;
- counter++;
- rehash();
- return i;
- }
+ if (hashtable.empty()) {
+ entries.emplace_back(value, -1);
+ do_rehash();
+ hash = do_hash(value.first);
+ } else {
+ entries.emplace_back(value, hashtable[hash]);
+ hashtable[hash] = entries.size() - 1;
}
+ return entries.size() - 1;
+ }
- int i = free_list;
- free_list = entries[i].get_next();
- entries[i].udata = value;
- entries[i].set_next_used(hashtable[hash]);
- hashtable[hash] = i;
- if ((begin_n < -1 && -(begin_n+2) <= i) || (begin_n >= -1 && begin_n <= i))
- begin_n = i;
- counter++;
- return i;
+ int do_insert(std::pair<K, T> &&rvalue, int &hash)
+ {
+ if (hashtable.empty()) {
+ auto key = rvalue.first;
+ entries.emplace_back(std::forward<std::pair<K, T>>(rvalue), -1);
+ do_rehash();
+ hash = do_hash(key);
+ } else {
+ entries.emplace_back(std::forward<std::pair<K, T>>(rvalue), hashtable[hash]);
+ hashtable[hash] = entries.size() - 1;
+ }
+ return entries.size() - 1;
}
public:
- class iterator
+ class const_iterator : public std::iterator<std::forward_iterator_tag, std::pair<K, T>>
{
- dict<K, T, OPS> *ptr;
+ friend class dict;
+ protected:
+ const dict *ptr;
int index;
+ const_iterator(const dict *ptr, int index) : ptr(ptr), index(index) { }
public:
- iterator() { }
- iterator(dict<K, T, OPS> *ptr, int index) : ptr(ptr), index(index) { }
- iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; }
- bool operator==(const iterator &other) const { return index == other.index; }
- bool operator!=(const iterator &other) const { return index != other.index; }
- std::pair<K, T> &operator*() { return ptr->entries[index].udata; }
- std::pair<K, T> *operator->() { return &ptr->entries[index].udata; }
+ const_iterator() { }
+ const_iterator operator++() { index--; return *this; }
+ bool operator<(const const_iterator &other) const { return index > other.index; }
+ bool operator==(const const_iterator &other) const { return index == other.index; }
+ bool operator!=(const const_iterator &other) const { return index != other.index; }
const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; }
const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; }
};
- class const_iterator
+ class iterator : public std::iterator<std::forward_iterator_tag, std::pair<K, T>>
{
- const dict<K, T, OPS> *ptr;
+ friend class dict;
+ protected:
+ dict *ptr;
int index;
+ iterator(dict *ptr, int index) : ptr(ptr), index(index) { }
public:
- const_iterator() { }
- const_iterator(const dict<K, T, OPS> *ptr, int index) : ptr(ptr), index(index) { }
- const_iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; }
- bool operator==(const const_iterator &other) const { return index == other.index; }
- bool operator!=(const const_iterator &other) const { return index != other.index; }
+ iterator() { }
+ iterator operator++() { index--; return *this; }
+ bool operator<(const iterator &other) const { return index > other.index; }
+ bool operator==(const iterator &other) const { return index == other.index; }
+ bool operator!=(const iterator &other) const { return index != other.index; }
+ std::pair<K, T> &operator*() { return ptr->entries[index].udata; }
+ std::pair<K, T> *operator->() { return &ptr->entries[index].udata; }
const std::pair<K, T> &operator*() const { return ptr->entries[index].udata; }
const std::pair<K, T> *operator->() const { return &ptr->entries[index].udata; }
+ operator const_iterator() const { return const_iterator(ptr, index); }
};
dict()
{
- init();
}
- dict(const dict<K, T, OPS> &other)
+ dict(const dict &other)
{
- init_from(other);
+ entries = other.entries;
+ do_rehash();
}
- dict(dict<K, T, OPS> &&other)
+ dict(dict &&other)
{
- init();
swap(other);
}
- dict<K, T, OPS> &operator=(const dict<K, T, OPS> &other) {
- if (this != &other)
- init_from(other);
+ dict &operator=(const dict &other) {
+ entries = other.entries;
+ do_rehash();
return *this;
}
- dict<K, T, OPS> &operator=(dict<K, T, OPS> &&other) {
+ dict &operator=(dict &&other) {
clear();
swap(other);
return *this;
dict(const std::initializer_list<std::pair<K, T>> &list)
{
- init();
for (auto &it : list)
insert(it);
}
template<class InputIterator>
dict(InputIterator first, InputIterator last)
{
- init();
insert(first, last);
}
insert(*first);
}
+ std::pair<iterator, bool> insert(const K &key)
+ {
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
+ if (i >= 0)
+ return std::pair<iterator, bool>(iterator(this, i), false);
+ i = do_insert(key, hash);
+ return std::pair<iterator, bool>(iterator(this, i), true);
+ }
+
std::pair<iterator, bool> insert(const std::pair<K, T> &value)
{
- int hash = mkhash(value.first);
- int i = lookup_index(value.first, hash);
+ int hash = do_hash(value.first);
+ int i = do_lookup(value.first, hash);
+ if (i >= 0)
+ return std::pair<iterator, bool>(iterator(this, i), false);
+ i = do_insert(value, hash);
+ return std::pair<iterator, bool>(iterator(this, i), true);
+ }
+
+ std::pair<iterator, bool> insert(std::pair<K, T> &&rvalue)
+ {
+ int hash = do_hash(rvalue.first);
+ int i = do_lookup(rvalue.first, hash);
+ if (i >= 0)
+ return std::pair<iterator, bool>(iterator(this, i), false);
+ i = do_insert(std::forward<std::pair<K, T>>(rvalue), hash);
+ return std::pair<iterator, bool>(iterator(this, i), true);
+ }
+
+ std::pair<iterator, bool> emplace(K const &key, T const &value)
+ {
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
+ if (i >= 0)
+ return std::pair<iterator, bool>(iterator(this, i), false);
+ i = do_insert(std::make_pair(key, value), hash);
+ return std::pair<iterator, bool>(iterator(this, i), true);
+ }
+
+ std::pair<iterator, bool> emplace(K const &key, T &&rvalue)
+ {
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
+ if (i >= 0)
+ return std::pair<iterator, bool>(iterator(this, i), false);
+ i = do_insert(std::make_pair(key, std::forward<T>(rvalue)), hash);
+ return std::pair<iterator, bool>(iterator(this, i), true);
+ }
+
+ std::pair<iterator, bool> emplace(K &&rkey, T const &value)
+ {
+ int hash = do_hash(rkey);
+ int i = do_lookup(rkey, hash);
+ if (i >= 0)
+ return std::pair<iterator, bool>(iterator(this, i), false);
+ i = do_insert(std::make_pair(std::forward<K>(rkey), value), hash);
+ return std::pair<iterator, bool>(iterator(this, i), true);
+ }
+
+ std::pair<iterator, bool> emplace(K &&rkey, T &&rvalue)
+ {
+ int hash = do_hash(rkey);
+ int i = do_lookup(rkey, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
- i = insert_at(value, hash);
+ i = do_insert(std::make_pair(std::forward<K>(rkey), std::forward<T>(rvalue)), hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
int erase(const K &key)
{
- int hash = mkhash(key);
- return do_erase(key, hash);
+ int hash = do_hash(key);
+ int index = do_lookup(key, hash);
+ return do_erase(index, hash);
}
iterator erase(iterator it)
{
- int hash = mkhash(it->first);
- do_erase(it->first, hash);
+ int hash = do_hash(it->first);
+ do_erase(it.index, hash);
return ++it;
}
int count(const K &key) const
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
return i < 0 ? 0 : 1;
}
+ int count(const K &key, const_iterator it) const
+ {
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
+ return i < 0 || i > it.index ? 0 : 1;
+ }
+
iterator find(const K &key)
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
if (i < 0)
return end();
return iterator(this, i);
const_iterator find(const K &key) const
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
if (i < 0)
return end();
return const_iterator(this, i);
T& at(const K &key)
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
if (i < 0)
throw std::out_of_range("dict::at()");
return entries[i].udata.second;
const T& at(const K &key) const
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
if (i < 0)
throw std::out_of_range("dict::at()");
return entries[i].udata.second;
}
+ const T& at(const K &key, const T &defval) const
+ {
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
+ if (i < 0)
+ return defval;
+ return entries[i].udata.second;
+ }
+
T& operator[](const K &key)
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
if (i < 0)
- i = insert_at(std::pair<K, T>(key, T()), hash);
+ i = do_insert(std::pair<K, T>(key, T()), hash);
return entries[i].udata.second;
}
- void swap(dict<K, T, OPS> &other)
+ template<typename Compare = std::less<K>>
+ void sort(Compare comp = Compare())
+ {
+ std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata.first, a.udata.first); });
+ do_rehash();
+ }
+
+ void swap(dict &other)
{
hashtable.swap(other.hashtable);
entries.swap(other.entries);
- std::swap(free_list, other.free_list);
- std::swap(counter, other.counter);
- std::swap(begin_n, other.begin_n);
- std::swap(begin_seek_count, other.begin_seek_count);
}
- bool operator==(const dict<K, T, OPS> &other) const {
- if (counter != other.counter)
+ bool operator==(const dict &other) const {
+ if (size() != other.size())
return false;
- if (counter == 0)
- return true;
- if (entries.size() < other.entries.size())
- for (auto &it : *this) {
- auto oit = other.find(it.first);
- if (oit == other.end() || oit->second != it.second)
- return false;
- }
- else
- for (auto &oit : other) {
- auto it = find(oit.first);
- if (it == end() || it->second != oit.second)
- return false;
- }
+ for (auto &it : entries) {
+ auto oit = other.find(it.udata.first);
+ if (oit == other.end() || !(oit->second == it.udata.second))
+ return false;
+ }
return true;
}
- bool operator!=(const dict<K, T, OPS> &other) const {
- return !(*this == other);
+ bool operator!=(const dict &other) const {
+ return !operator==(other);
}
- size_t size() const { return counter; }
- bool empty() const { return counter == 0; }
- void clear() { hashtable.clear(); entries.clear(); init(); }
+ void reserve(size_t n) { entries.reserve(n); }
+ size_t size() const { return entries.size(); }
+ bool empty() const { return entries.empty(); }
+ void clear() { hashtable.clear(); entries.clear(); }
- iterator begin() { upd_begin_n(); return iterator(this, begin_n); }
+ iterator begin() { return iterator(this, int(entries.size())-1); }
+ iterator element(int n) { return iterator(this, int(entries.size())-1-n); }
iterator end() { return iterator(nullptr, -1); }
- const_iterator begin() const { ((dict*)this)->upd_begin_n(); return const_iterator(this, begin_n); }
+ const_iterator begin() const { return const_iterator(this, int(entries.size())-1); }
+ const_iterator element(int n) const { return const_iterator(this, int(entries.size())-1-n); }
const_iterator end() const { return const_iterator(nullptr, -1); }
};
-template<typename K, typename OPS = hash_ops<K>>
+template<typename K, typename OPS>
class pool
{
+ template<typename, int, typename> friend class idict;
+
+protected:
struct entry_t
{
- int link;
- K key;
-
- entry_t() : link(-1) { }
- entry_t(const K &key) : link(1), key(key) { }
+ K udata;
+ int next;
- bool is_free() const { return link < 0; }
- int get_next() const { return (link > 0 ? link : -link) - 2; }
- bool get_last() const { return get_next() == -1; }
- void set_next_used(int next) { link = next + 2; }
- void set_next_free(int next) { link = -(next + 2); }
+ entry_t() { }
+ entry_t(const K &udata, int next) : udata(udata), next(next) { }
+ entry_t(K &&udata, int next) : udata(std::move(udata)), next(next) { }
};
std::vector<int> hashtable;
std::vector<entry_t> entries;
- int free_list, counter, begin_n;
- int begin_seek_count;
OPS ops;
- void init()
- {
- free_list = -1;
- counter = 0;
- begin_n = -1;
- begin_seek_count = 0;
- }
-
- void init_from(const pool<K, OPS> &other)
- {
- hashtable.clear();
- entries.clear();
-
- counter = other.size();
- begin_n = counter - 1;
- entries.reserve(counter);
-
- for (auto &it : other)
- entries.push_back(entry_t(it));
-
- rehash();
+#ifdef NDEBUG
+ static inline void do_assert(bool) { }
+#else
+ static inline void do_assert(bool cond) {
+ if (!cond) throw std::runtime_error("pool<> assert failed.");
}
+#endif
- int mkhash(const K &key) const
+ int do_hash(const K &key) const
{
unsigned int hash = 0;
if (!hashtable.empty())
return hash;
}
- void upd_begin_n(bool do_refree = true)
+ void do_rehash()
{
- if (begin_n < -1) {
- begin_n = -(begin_n+2);
- while (begin_n >= 0 && entries[begin_n].is_free()) { begin_seek_count++; begin_n--; }
- if (do_refree && begin_seek_count > int(entries.size() / 2)) refree();
+ hashtable.clear();
+ hashtable.resize(hashtable_size(entries.capacity() * hashtable_size_factor), -1);
+
+ for (int i = 0; i < int(entries.size()); i++) {
+ do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size()));
+ int hash = do_hash(entries[i].udata);
+ entries[i].next = hashtable[hash];
+ hashtable[hash] = i;
}
}
- void refree()
+ int do_erase(int index, int hash)
{
- free_list = -1;
- begin_n = -1;
+ do_assert(index < int(entries.size()));
+ if (hashtable.empty() || index < 0)
+ return 0;
+
+ int k = hashtable[hash];
+ if (k == index) {
+ hashtable[hash] = entries[index].next;
+ } else {
+ while (entries[k].next != index) {
+ k = entries[k].next;
+ do_assert(0 <= k && k < int(entries.size()));
+ }
+ entries[k].next = entries[index].next;
+ }
- int last_free = -1;
- for (int i = 0; i < int(entries.size()); i++)
- if (entries[i].is_free()) {
- if (last_free != -1)
- entries[last_free].set_next_free(i);
- else
- free_list = i;
- last_free = i;
- } else
- begin_n = i;
+ int back_idx = entries.size()-1;
- if (last_free != -1)
- entries[last_free].set_next_free(-1);
+ if (index != back_idx)
+ {
+ int back_hash = do_hash(entries[back_idx].udata);
- begin_seek_count = 0;
+ k = hashtable[back_hash];
+ if (k == back_idx) {
+ hashtable[back_hash] = index;
+ } else {
+ while (entries[k].next != back_idx) {
+ k = entries[k].next;
+ do_assert(0 <= k && k < int(entries.size()));
+ }
+ entries[k].next = index;
+ }
+
+ entries[index] = std::move(entries[back_idx]);
+ }
+
+ entries.pop_back();
+
+ if (entries.empty())
+ hashtable.clear();
+
+ return 1;
}
- void rehash()
+ int do_lookup(const K &key, int &hash) const
{
- upd_begin_n(false);
- entries.resize(begin_n + 1);
+ if (hashtable.empty())
+ return -1;
- free_list = -1;
- begin_n = -1;
+ if (entries.size() * hashtable_size_trigger > hashtable.size()) {
+ ((pool*)this)->do_rehash();
+ hash = do_hash(key);
+ }
- hashtable.clear();
- hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1);
-
- int last_free = -1;
- for (int i = 0; i < int(entries.size()); i++)
- if (entries[i].is_free()) {
- if (last_free != -1)
- entries[last_free].set_next_free(i);
- else
- free_list = i;
- last_free = i;
- } else {
- int hash = mkhash(entries[i].key);
- entries[i].set_next_used(hashtable[hash]);
- hashtable[hash] = i;
- begin_n = i;
- }
+ int index = hashtable[hash];
- if (last_free != -1)
- entries[last_free].set_next_free(-1);
-
- begin_seek_count = 0;
- }
-
- int do_erase(const K &key, int hash)
- {
- int last_index = -1;
- int index = hashtable.empty() ? -1 : hashtable[hash];
- while (1) {
- if (index < 0)
- return 0;
- if (ops.cmp(entries[index].key, key)) {
- if (last_index < 0)
- hashtable[hash] = entries[index].get_next();
- else
- entries[last_index].set_next_used(entries[index].get_next());
- entries[index].key = K();
- entries[index].set_next_free(free_list);
- free_list = index;
- if (--counter == 0)
- clear();
- else if (index == begin_n)
- begin_n = -(begin_n+2);
- return 1;
- }
- last_index = index;
- index = entries[index].get_next();
+ while (index >= 0 && !ops.cmp(entries[index].udata, key)) {
+ index = entries[index].next;
+ do_assert(-1 <= index && index < int(entries.size()));
}
+
+ return index;
}
- int lookup_index(const K &key, int hash) const
+ int do_insert(const K &value, int &hash)
{
- int index = hashtable.empty() ? -1 : hashtable[hash];
- while (1) {
- if (index < 0)
- return -1;
- if (ops.cmp(entries[index].key, key))
- return index;
- index = entries[index].get_next();
+ if (hashtable.empty()) {
+ entries.emplace_back(value, -1);
+ do_rehash();
+ hash = do_hash(value);
+ } else {
+ entries.emplace_back(value, hashtable[hash]);
+ hashtable[hash] = entries.size() - 1;
}
+ return entries.size() - 1;
}
- int insert_at(const K &key, int hash)
+ int do_insert(K &&rvalue, int &hash)
{
- if (free_list < 0)
- {
- free_list = entries.size();
- entries.push_back(entry_t());
-
- if (entries.size() * hashtable_size_trigger > hashtable.size()) {
- int i = free_list;
- entries[i].key = key;
- entries[i].set_next_used(0);
- begin_n = i;
- counter++;
- rehash();
- return i;
- }
+ if (hashtable.empty()) {
+ entries.emplace_back(std::forward<K>(rvalue), -1);
+ do_rehash();
+ hash = do_hash(rvalue);
+ } else {
+ entries.emplace_back(std::forward<K>(rvalue), hashtable[hash]);
+ hashtable[hash] = entries.size() - 1;
}
-
- int i = free_list;
- free_list = entries[i].get_next();
- entries[i].key = key;
- entries[i].set_next_used(hashtable[hash]);
- hashtable[hash] = i;
- if ((begin_n < -1 && -(begin_n+2) <= i) || (begin_n >= -1 && begin_n <= i))
- begin_n = i;
- counter++;
- return i;
+ return entries.size() - 1;
}
public:
- class iterator
+ class const_iterator : public std::iterator<std::forward_iterator_tag, K>
{
- pool<K, OPS> *ptr;
+ friend class pool;
+ protected:
+ const pool *ptr;
int index;
+ const_iterator(const pool *ptr, int index) : ptr(ptr), index(index) { }
public:
- iterator() { }
- iterator(pool<K, OPS> *ptr, int index) : ptr(ptr), index(index) { }
- iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; }
- bool operator==(const iterator &other) const { return index == other.index; }
- bool operator!=(const iterator &other) const { return index != other.index; }
- K &operator*() { return ptr->entries[index].key; }
- K *operator->() { return &ptr->entries[index].key; }
- const K &operator*() const { return ptr->entries[index].key; }
- const K *operator->() const { return &ptr->entries[index].key; }
+ const_iterator() { }
+ const_iterator operator++() { index--; return *this; }
+ bool operator==(const const_iterator &other) const { return index == other.index; }
+ bool operator!=(const const_iterator &other) const { return index != other.index; }
+ const K &operator*() const { return ptr->entries[index].udata; }
+ const K *operator->() const { return &ptr->entries[index].udata; }
};
- class const_iterator
+ class iterator : public std::iterator<std::forward_iterator_tag, K>
{
- const pool<K, OPS> *ptr;
+ friend class pool;
+ protected:
+ pool *ptr;
int index;
+ iterator(pool *ptr, int index) : ptr(ptr), index(index) { }
public:
- const_iterator() { }
- const_iterator(const pool<K, OPS> *ptr, int index) : ptr(ptr), index(index) { }
- const_iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; }
- bool operator==(const const_iterator &other) const { return index == other.index; }
- bool operator!=(const const_iterator &other) const { return index != other.index; }
- const K &operator*() const { return ptr->entries[index].key; }
- const K *operator->() const { return &ptr->entries[index].key; }
+ iterator() { }
+ iterator operator++() { index--; return *this; }
+ bool operator==(const iterator &other) const { return index == other.index; }
+ bool operator!=(const iterator &other) const { return index != other.index; }
+ K &operator*() { return ptr->entries[index].udata; }
+ K *operator->() { return &ptr->entries[index].udata; }
+ const K &operator*() const { return ptr->entries[index].udata; }
+ const K *operator->() const { return &ptr->entries[index].udata; }
+ operator const_iterator() const { return const_iterator(ptr, index); }
};
pool()
{
- init();
}
- pool(const pool<K, OPS> &other)
+ pool(const pool &other)
{
- init_from(other);
+ entries = other.entries;
+ do_rehash();
}
- pool(pool<K, OPS> &&other)
+ pool(pool &&other)
{
- init();
swap(other);
}
- pool<K, OPS> &operator=(const pool<K, OPS> &other) {
- if (this != &other)
- init_from(other);
+ pool &operator=(const pool &other) {
+ entries = other.entries;
+ do_rehash();
return *this;
}
- pool<K, OPS> &operator=(pool<K, OPS> &&other) {
+ pool &operator=(pool &&other) {
clear();
swap(other);
return *this;
pool(const std::initializer_list<K> &list)
{
- init();
for (auto &it : list)
insert(it);
}
template<class InputIterator>
pool(InputIterator first, InputIterator last)
{
- init();
insert(first, last);
}
insert(*first);
}
- std::pair<iterator, bool> insert(const K &key)
+ std::pair<iterator, bool> insert(const K &value)
+ {
+ int hash = do_hash(value);
+ int i = do_lookup(value, hash);
+ if (i >= 0)
+ return std::pair<iterator, bool>(iterator(this, i), false);
+ i = do_insert(value, hash);
+ return std::pair<iterator, bool>(iterator(this, i), true);
+ }
+
+ std::pair<iterator, bool> insert(K &&rvalue)
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(rvalue);
+ int i = do_lookup(rvalue, hash);
if (i >= 0)
return std::pair<iterator, bool>(iterator(this, i), false);
- i = insert_at(key, hash);
+ i = do_insert(std::forward<K>(rvalue), hash);
return std::pair<iterator, bool>(iterator(this, i), true);
}
+ template<typename... Args>
+ std::pair<iterator, bool> emplace(Args&&... args)
+ {
+ return insert(K(std::forward<Args>(args)...));
+ }
+
int erase(const K &key)
{
- int hash = mkhash(key);
- return do_erase(key, hash);
+ int hash = do_hash(key);
+ int index = do_lookup(key, hash);
+ return do_erase(index, hash);
}
iterator erase(iterator it)
{
- int hash = mkhash(*it);
- do_erase(*it, hash);
+ int hash = do_hash(*it);
+ do_erase(it.index, hash);
return ++it;
}
int count(const K &key) const
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
return i < 0 ? 0 : 1;
}
+ int count(const K &key, const_iterator it) const
+ {
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
+ return i < 0 || i > it.index ? 0 : 1;
+ }
+
iterator find(const K &key)
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
if (i < 0)
return end();
return iterator(this, i);
const_iterator find(const K &key) const
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
if (i < 0)
return end();
return const_iterator(this, i);
}
- bool operator[](const K &key) const
+ bool operator[](const K &key)
{
- int hash = mkhash(key);
- int i = lookup_index(key, hash);
+ int hash = do_hash(key);
+ int i = do_lookup(key, hash);
return i >= 0;
}
- void swap(pool<K, OPS> &other)
+ template<typename Compare = std::less<K>>
+ void sort(Compare comp = Compare())
+ {
+ std::sort(entries.begin(), entries.end(), [comp](const entry_t &a, const entry_t &b){ return comp(b.udata, a.udata); });
+ do_rehash();
+ }
+
+ K pop()
+ {
+ iterator it = begin();
+ K ret = *it;
+ erase(it);
+ return ret;
+ }
+
+ void swap(pool &other)
{
hashtable.swap(other.hashtable);
entries.swap(other.entries);
- std::swap(free_list, other.free_list);
- std::swap(counter, other.counter);
- std::swap(begin_n, other.begin_n);
- std::swap(begin_seek_count, other.begin_seek_count);
}
- bool operator==(const pool<K, OPS> &other) const {
- if (counter != other.counter)
+ bool operator==(const pool &other) const {
+ if (size() != other.size())
return false;
- if (counter == 0)
- return true;
- if (entries.size() < other.entries.size())
- for (auto &it : *this) {
- auto oit = other.find(it.first);
- if (oit == other.end() || oit->second != it.second)
- return false;
- }
- else
- for (auto &oit : other) {
- auto it = find(oit.first);
- if (it == end() || it->second != oit.second)
- return false;
- }
+ for (auto &it : entries)
+ if (!other.count(it.udata))
+ return false;
return true;
}
- bool operator!=(const pool<K, OPS> &other) const {
- return !(*this == other);
+ bool operator!=(const pool &other) const {
+ return !operator==(other);
+ }
+
+ bool hash() const {
+ unsigned int hashval = mkhash_init;
+ for (auto &it : entries)
+ hashval ^= ops.hash(it.udata);
+ return hashval;
}
- size_t size() const { return counter; }
- bool empty() const { return counter == 0; }
- void clear() { hashtable.clear(); entries.clear(); init(); }
+ void reserve(size_t n) { entries.reserve(n); }
+ size_t size() const { return entries.size(); }
+ bool empty() const { return entries.empty(); }
+ void clear() { hashtable.clear(); entries.clear(); }
- iterator begin() { upd_begin_n(); return iterator(this, begin_n); }
+ iterator begin() { return iterator(this, int(entries.size())-1); }
+ iterator element(int n) { return iterator(this, int(entries.size())-1-n); }
iterator end() { return iterator(nullptr, -1); }
- const_iterator begin() const { ((pool*)this)->upd_begin_n(); return const_iterator(this, begin_n); }
+ const_iterator begin() const { return const_iterator(this, int(entries.size())-1); }
+ const_iterator element(int n) const { return const_iterator(this, int(entries.size())-1-n); }
const_iterator end() const { return const_iterator(nullptr, -1); }
};
+template<typename K, int offset, typename OPS>
+class idict
+{
+ pool<K, OPS> database;
+
+public:
+ class const_iterator : public std::iterator<std::forward_iterator_tag, K>
+ {
+ friend class idict;
+ protected:
+ const idict &container;
+ int index;
+ const_iterator(const idict &container, int index) : container(container), index(index) { }
+ public:
+ const_iterator() { }
+ const_iterator operator++() { index++; return *this; }
+ bool operator==(const const_iterator &other) const { return index == other.index; }
+ bool operator!=(const const_iterator &other) const { return index != other.index; }
+ const K &operator*() const { return container[index]; }
+ const K *operator->() const { return &container[index]; }
+ };
+
+ int operator()(const K &key)
+ {
+ int hash = database.do_hash(key);
+ int i = database.do_lookup(key, hash);
+ if (i < 0)
+ i = database.do_insert(key, hash);
+ return i + offset;
+ }
+
+ int at(const K &key) const
+ {
+ int hash = database.do_hash(key);
+ int i = database.do_lookup(key, hash);
+ if (i < 0)
+ throw std::out_of_range("idict::at()");
+ return i + offset;
+ }
+
+ int at(const K &key, int defval) const
+ {
+ int hash = database.do_hash(key);
+ int i = database.do_lookup(key, hash);
+ if (i < 0)
+ return defval;
+ return i + offset;
+ }
+
+ int count(const K &key) const
+ {
+ int hash = database.do_hash(key);
+ int i = database.do_lookup(key, hash);
+ return i < 0 ? 0 : 1;
+ }
+
+ void expect(const K &key, int i)
+ {
+ int j = (*this)(key);
+ if (i != j)
+ throw std::out_of_range("idict::expect()");
+ }
+
+ const K &operator[](int index) const
+ {
+ return database.entries.at(index - offset).udata;
+ }
+
+ void swap(idict &other)
+ {
+ database.swap(other.database);
+ }
+
+ void reserve(size_t n) { database.reserve(n); }
+ size_t size() const { return database.size(); }
+ bool empty() const { return database.empty(); }
+ void clear() { database.clear(); }
+
+ const_iterator begin() const { return const_iterator(*this, offset); }
+ const_iterator element(int n) const { return const_iterator(*this, n); }
+ const_iterator end() const { return const_iterator(*this, offset + size()); }
+};
+
+template<typename K, typename OPS>
+class mfp
+{
+ mutable idict<K, 0, OPS> database;
+ mutable std::vector<int> parents;
+
+public:
+ typedef typename idict<K, 0, OPS>::const_iterator const_iterator;
+
+ int operator()(const K &key) const
+ {
+ int i = database(key);
+ parents.resize(database.size(), -1);
+ return i;
+ }
+
+ const K &operator[](int index) const
+ {
+ return database[index];
+ }
+
+ int ifind(int i) const
+ {
+ int p = i, k = i;
+
+ while (parents[p] != -1)
+ p = parents[p];
+
+ while (k != p) {
+ int next_k = parents[k];
+ parents[k] = p;
+ k = next_k;
+ }
+
+ return p;
+ }
+
+ void imerge(int i, int j)
+ {
+ i = ifind(i);
+ j = ifind(j);
+
+ if (i != j)
+ parents[i] = j;
+ }
+
+ void ipromote(int i)
+ {
+ int k = i;
+
+ while (k != -1) {
+ int next_k = parents[k];
+ parents[k] = i;
+ k = next_k;
+ }
+
+ parents[i] = -1;
+ }
+
+ int lookup(const K &a) const
+ {
+ return ifind((*this)(a));
+ }
+
+ const K &find(const K &a) const
+ {
+ int i = database.at(a, -1);
+ if (i < 0)
+ return a;
+ return (*this)[ifind(i)];
+ }
+
+ void merge(const K &a, const K &b)
+ {
+ imerge((*this)(a), (*this)(b));
+ }
+
+ void promote(const K &a)
+ {
+ int i = database.at(a, -1);
+ if (i >= 0)
+ ipromote(i);
+ }
+
+ void swap(mfp &other)
+ {
+ database.swap(other.database);
+ parents.swap(other.parents);
+ }
+
+ void reserve(size_t n) { database.reserve(n); }
+ size_t size() const { return database.size(); }
+ bool empty() const { return database.empty(); }
+ void clear() { database.clear(); parents.clear(); }
+
+ const_iterator begin() const { return database.begin(); }
+ const_iterator element(int n) const { return database.element(n); }
+ const_iterator end() const { return database.end(); }
+};
+
} /* namespace hashlib */
#endif