new hashlib::pool<> (derived from new dict)
authorClifford Wolf <clifford@clifford.at>
Wed, 31 Dec 2014 03:19:04 +0000 (04:19 +0100)
committerClifford Wolf <clifford@clifford.at>
Wed, 31 Dec 2014 03:19:04 +0000 (04:19 +0100)
kernel/hashlib.h

index a34d194f1b6621fbda36201795d5de8c03c8ed81..6330841f3d4991200595d364d230ac380f4f3cad 100644 (file)
@@ -289,13 +289,13 @@ class dict
 public:
        class iterator
        {
-               friend dict<K, T, OPS>;
+               friend dict;
        protected:
-               dict<K, T, OPS> *ptr;
+               dict *ptr;
                int index;
        public:
                iterator() { }
-               iterator(dict<K, T, OPS> *ptr, int index) : ptr(ptr), index(index) { }
+               iterator(dict *ptr, int index) : ptr(ptr), index(index) { }
                iterator operator++() { index--; return *this; }
                bool operator==(const iterator &other) const { return index == other.index; }
                bool operator!=(const iterator &other) const { return index != other.index; }
@@ -307,13 +307,13 @@ public:
 
        class const_iterator
        {
-               friend dict<K, T, OPS>;
+               friend dict;
        protected:
-               const dict<K, T, OPS> *ptr;
+               const dict *ptr;
                int index;
        public:
                const_iterator() { }
-               const_iterator(const dict<K, T, OPS> *ptr, int index) : ptr(ptr), index(index) { }
+               const_iterator(const dict *ptr, int index) : ptr(ptr), index(index) { }
                const_iterator operator++() { index--; return *this; }
                bool operator==(const const_iterator &other) const { return index == other.index; }
                bool operator!=(const const_iterator &other) const { return index != other.index; }
@@ -325,24 +325,24 @@ public:
        {
        }
 
-       dict(const dict<K, T, OPS> &other)
+       dict(const dict &other)
        {
                entries = other.entries;
                do_rehash();
        }
 
-       dict(dict<K, T, OPS> &&other)
+       dict(dict &&other)
        {
                swap(other);
        }
 
-       dict<K, T, OPS> &operator=(const dict<K, T, OPS> &other) {
+       dict &operator=(const dict &other) {
                entries = other.entries;
                do_rehash();
                return *this;
        }
 
-       dict<K, T, OPS> &operator=(dict<K, T, OPS> &&other) {
+       dict &operator=(dict &&other) {
                clear();
                swap(other);
                return *this;
@@ -443,13 +443,13 @@ public:
                return entries[i].udata.second;
        }
 
-       void swap(dict<K, T, OPS> &other)
+       void swap(dict &other)
        {
                hashtable.swap(other.hashtable);
                entries.swap(other.entries);
        }
 
-       bool operator==(const dict<K, T, OPS> &other) const {
+       bool operator==(const dict &other) const {
                if (size() != other.size())
                        return false;
                for (auto &it : entries) {
@@ -460,7 +460,7 @@ public:
                return true;
        }
 
-       bool operator!=(const dict<K, T, OPS> &other) const {
+       bool operator!=(const dict &other) const {
                return !(*this == other);
        }
 
@@ -475,54 +475,38 @@ public:
        const_iterator end() const { return const_iterator(nullptr, -1); }
 };
 
+// ********************************************************************************
+// ********************************************************************************
+// ********************************************************************************
+// ********************************************************************************
+// ********************************************************************************
+
+
 template<typename K, typename OPS = hash_ops<K>>
 class pool
 {
        struct entry_t
        {
-               int link;
-               K key;
-
-               entry_t() : link(-1) { }
-               entry_t(const K &key) : link(1), key(key) { }
+               K udata;
+               int next;
 
-               bool is_free() const { return link < 0; }
-               int get_next() const { return (link > 0 ? link : -link) - 2; }
-               bool get_last() const { return get_next() == -1; }
-               void set_next_used(int next) { link = next + 2; }
-               void set_next_free(int next) { link = -(next + 2); }
+               entry_t() { }
+               entry_t(const K &udata, int next) : udata(udata), next(next) { }
        };
 
        std::vector<int> hashtable;
        std::vector<entry_t> entries;
-       int free_list, counter, begin_n;
-       int begin_seek_count;
        OPS ops;
 
-       void init()
-       {
-               free_list = -1;
-               counter = 0;
-               begin_n = -1;
-               begin_seek_count = 0;
-       }
-
-       void init_from(const pool<K, OPS> &other)
-       {
-               hashtable.clear();
-               entries.clear();
-
-               counter = other.size();
-               begin_n = counter - 1;
-               entries.reserve(counter);
-
-               for (auto &it : other)
-                       entries.push_back(entry_t(it));
-
-               rehash();
+#if 0
+       static inline void do_assert(bool cond) {
+               if (!cond) throw std::runtime_error("pool<> assert failed.");
        }
+#else
+       static inline void do_assert(bool) { }
+#endif
 
-       int mkhash(const K &key) const
+       int do_hash(const K &key) const
        {
                unsigned int hash = 0;
                if (!hashtable.empty())
@@ -530,190 +514,152 @@ class pool
                return hash;
        }
 
-       void upd_begin_n(bool do_refree = true)
+       void do_rehash()
        {
-               if (begin_n < -1) {
-                       begin_n = -(begin_n+2);
-                       while (begin_n >= 0 && entries[begin_n].is_free()) { begin_seek_count++; begin_n--; }
-                       if (do_refree && begin_seek_count > int(entries.size() / 2)) refree();
+               hashtable.clear();
+               hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1);
+
+               for (int i = 0; i < int(entries.size()); i++) {
+                       do_assert(-1 <= entries[i].next && entries[i].next < int(entries.size()));
+                       int hash = do_hash(entries[i].udata);
+                       entries[i].next = hashtable[hash];
+                       hashtable[hash] = i;
                }
        }
 
-       void refree()
+       int do_erase(int index, int hash)
        {
-               free_list = -1;
-               begin_n = -1;
-
-               int last_free = -1;
-               for (int i = 0; i < int(entries.size()); i++)
-                       if (entries[i].is_free()) {
-                               if (last_free != -1)
-                                       entries[last_free].set_next_free(i);
-                               else
-                                       free_list = i;
-                               last_free = i;
-                       } else
-                               begin_n = i;
-
-               if (last_free != -1)
-                       entries[last_free].set_next_free(-1);
-
-               begin_seek_count = 0;
-       }
+               do_assert(index < int(entries.size()));
+               if (hashtable.empty() || index < 0)
+                       return 0;
 
-       void rehash()
-       {
-               upd_begin_n(false);
-               entries.resize(begin_n + 1);
+               int k = hashtable[hash];
+               if (k == index) {
+                       hashtable[hash] = entries[index].next;
+               } else {
+                       while (entries[k].next != index) {
+                               k = entries[k].next;
+                               do_assert(0 <= k && k < int(entries.size()));
+                       }
+                       entries[k].next = entries[index].next;
+               }
 
-               free_list = -1;
-               begin_n = -1;
+               int back_idx = entries.size()-1;
 
-               hashtable.clear();
-               hashtable.resize(hashtable_size(entries.size() * hashtable_size_factor), -1);
+               if (index != back_idx)
+               {
+                       int back_hash = do_hash(entries[back_idx].udata);
 
-               int last_free = -1;
-               for (int i = 0; i < int(entries.size()); i++)
-                       if (entries[i].is_free()) {
-                               if (last_free != -1)
-                                       entries[last_free].set_next_free(i);
-                               else
-                                       free_list = i;
-                               last_free = i;
+                       k = hashtable[back_hash];
+                       if (k == back_idx) {
+                               hashtable[back_hash] = index;
                        } else {
-                               int hash = mkhash(entries[i].key);
-                               entries[i].set_next_used(hashtable[hash]);
-                               hashtable[hash] = i;
-                               begin_n = i;
+                               while (entries[k].next != back_idx) {
+                                       k = entries[k].next;
+                                       do_assert(0 <= k && k < int(entries.size()));
+                               }
+                               entries[k].next = index;
                        }
 
-               if (last_free != -1)
-                       entries[last_free].set_next_free(-1);
-
-               begin_seek_count = 0;
-       }
-
-       int do_erase(const K &key, int hash)
-       {
-               int last_index = -1;
-               int index = hashtable.empty() ? -1 : hashtable[hash];
-               while (1) {
-                       if (index < 0)
-                               return 0;
-                       if (ops.cmp(entries[index].key, key)) {
-                               if (last_index < 0)
-                                       hashtable[hash] = entries[index].get_next();
-                               else
-                                       entries[last_index].set_next_used(entries[index].get_next());
-                               entries[index].key = K();
-                               entries[index].set_next_free(free_list);
-                               free_list = index;
-                               if (--counter == 0)
-                                       clear();
-                               else if (index == begin_n)
-                                       begin_n = -(begin_n+2);
-                               return 1;
-                       }
-                       last_index = index;
-                       index = entries[index].get_next();
+                       entries[index] = std::move(entries[back_idx]);
                }
+
+               entries.pop_back();
+
+               if (entries.empty())
+                       hashtable.clear();
+
+               return 1;
        }
 
-       int lookup_index(const K &key, int hash) const
+       int do_lookup(const K &key, int &hash) const
        {
-               int index = hashtable.empty() ? -1 : hashtable[hash];
-               while (1) {
-                       if (index < 0)
-                               return -1;
-                       if (ops.cmp(entries[index].key, key))
-                               return index;
-                       index = entries[index].get_next();
+               if (hashtable.empty())
+                       return -1;
+
+               if (entries.size() * hashtable_size_trigger > hashtable.size()) {
+                       ((pool*)this)->do_rehash();
+                       hash = do_hash(key);
                }
+
+               int index = hashtable[hash];
+
+               while (index >= 0 && !ops.cmp(entries[index].udata, key)) {
+                       index = entries[index].next;
+                       do_assert(-1 <= index && index < int(entries.size()));
+               }
+
+               return index;
        }
 
-       int insert_at(const K &key, int hash)
+       int do_insert(const K &value, int &hash)
        {
-               if (free_list < 0)
-               {
-                       free_list = entries.size();
-                       entries.push_back(entry_t());
-
-                       if (entries.size() * hashtable_size_trigger > hashtable.size()) {
-                               int i = free_list;
-                               entries[i].key = key;
-                               entries[i].set_next_used(0);
-                               begin_n = i;
-                               counter++;
-                               rehash();
-                               return i;
-                       }
+               if (hashtable.empty()) {
+                       entries.push_back(entry_t(value, -1));
+                       do_rehash();
+                       hash = do_hash(value);
+               } else {
+                       entries.push_back(entry_t(value, hashtable[hash]));
+                       hashtable[hash] = entries.size() - 1;
                }
-
-               int i = free_list;
-               free_list = entries[i].get_next();
-               entries[i].key = key;
-               entries[i].set_next_used(hashtable[hash]);
-               hashtable[hash] = i;
-               if ((begin_n < -1 && -(begin_n+2) <= i) || (begin_n >= -1 && begin_n <= i))
-                       begin_n = i;
-               counter++;
-               return i;
+               return entries.size() - 1;
        }
 
 public:
        class iterator
        {
-               pool<K, OPS> *ptr;
+               friend pool;
+       protected:
+               pool *ptr;
                int index;
        public:
                iterator() { }
-               iterator(pool<K, OPS> *ptr, int index) : ptr(ptr), index(index) { }
-               iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; }
+               iterator(pool *ptr, int index) : ptr(ptr), index(index) { }
+               iterator operator++() { index--; return *this; }
                bool operator==(const iterator &other) const { return index == other.index; }
                bool operator!=(const iterator &other) const { return index != other.index; }
-               K &operator*() { return ptr->entries[index].key; }
-               K *operator->() { return &ptr->entries[index].key; }
-               const K &operator*() const { return ptr->entries[index].key; }
-               const K *operator->() const { return &ptr->entries[index].key; }
+               const K &operator*() const { return ptr->entries[index].udata; }
+               const K *operator->() const { return &ptr->entries[index].udata; }
        };
 
        class const_iterator
        {
-               const pool<K, OPS> *ptr;
+               friend pool;
+       protected:
+               const pool *ptr;
                int index;
        public:
                const_iterator() { }
-               const_iterator(const pool<K, OPS> *ptr, int index) : ptr(ptr), index(index) { }
-               const_iterator operator++() { do index--; while (index >= 0 && ptr->entries[index].is_free()); return *this; }
+               const_iterator(const pool *ptr, int index) : ptr(ptr), index(index) { }
+               const_iterator operator++() { index--; return *this; }
                bool operator==(const const_iterator &other) const { return index == other.index; }
                bool operator!=(const const_iterator &other) const { return index != other.index; }
-               const K &operator*() const { return ptr->entries[index].key; }
-               const K *operator->() const { return &ptr->entries[index].key; }
+               const K &operator*() const { return ptr->entries[index].udata; }
+               const K *operator->() const { return &ptr->entries[index].udata; }
        };
 
        pool()
        {
-               init();
        }
 
-       pool(const pool<K, OPS> &other)
+       pool(const pool &other)
        {
-               init_from(other);
+               entries = other.entries;
+               do_rehash();
        }
 
-       pool(pool<K, OPS> &&other)
+       pool(pool &&other)
        {
-               init();
                swap(other);
        }
 
-       pool<K, OPS> &operator=(const pool<K, OPS> &other) {
-               if (this != &other)
-                       init_from(other);
+       pool &operator=(const pool &other) {
+               entries = other.entries;
+               do_rehash();
                return *this;
        }
 
-       pool<K, OPS> &operator=(pool<K, OPS> &&other) {
+       pool &operator=(pool &&other) {
                clear();
                swap(other);
                return *this;
@@ -721,7 +667,6 @@ public:
 
        pool(const std::initializer_list<K> &list)
        {
-               init();
                for (auto &it : list)
                        insert(it);
        }
@@ -729,7 +674,6 @@ public:
        template<class InputIterator>
        pool(InputIterator first, InputIterator last)
        {
-               init();
                insert(first, last);
        }
 
@@ -740,40 +684,41 @@ public:
                        insert(*first);
        }
 
-       std::pair<iterator, bool> insert(const K &key)
+       std::pair<iterator, bool> insert(const K &value)
        {
-               int hash = mkhash(key);
-               int i = lookup_index(key, hash);
+               int hash = do_hash(value);
+               int i = do_lookup(value, hash);
                if (i >= 0)
                        return std::pair<iterator, bool>(iterator(this, i), false);
-               i = insert_at(key, hash);
+               i = do_insert(value, hash);
                return std::pair<iterator, bool>(iterator(this, i), true);
        }
 
        int erase(const K &key)
        {
-               int hash = mkhash(key);
-               return do_erase(key, hash);
+               int hash = do_hash(key);
+               int index = do_lookup(key, hash);
+               return do_erase(index, hash);
        }
 
        iterator erase(iterator it)
        {
-               int hash = mkhash(*it);
-               do_erase(*it, hash);
+               int hash = do_hash(it->first);
+               do_erase(it.index, hash);
                return ++it;
        }
 
        int count(const K &key) const
        {
-               int hash = mkhash(key);
-               int i = lookup_index(key, hash);
+               int hash = do_hash(key);
+               int i = do_lookup(key, hash);
                return i < 0 ? 0 : 1;
        }
 
        iterator find(const K &key)
        {
-               int hash = mkhash(key);
-               int i = lookup_index(key, hash);
+               int hash = do_hash(key);
+               int i = do_lookup(key, hash);
                if (i < 0)
                        return end();
                return iterator(this, i);
@@ -781,62 +726,47 @@ public:
 
        const_iterator find(const K &key) const
        {
-               int hash = mkhash(key);
-               int i = lookup_index(key, hash);
+               int hash = do_hash(key);
+               int i = do_lookup(key, hash);
                if (i < 0)
                        return end();
                return const_iterator(this, i);
        }
 
-       bool operator[](const K &key) const
+       bool operator[](const K &key)
        {
-               int hash = mkhash(key);
-               int i = lookup_index(key, hash);
+               int hash = do_hash(key);
+               int i = do_lookup(key, hash);
                return i >= 0;
        }
 
-       void swap(pool<K, OPS> &other)
+       void swap(pool &other)
        {
                hashtable.swap(other.hashtable);
                entries.swap(other.entries);
-               std::swap(free_list, other.free_list);
-               std::swap(counter, other.counter);
-               std::swap(begin_n, other.begin_n);
-               std::swap(begin_seek_count, other.begin_seek_count);
        }
 
-       bool operator==(const pool<K, OPS> &other) const {
-               if (counter != other.counter)
+       bool operator==(const pool &other) const {
+               if (size() != other.size())
                        return false;
-               if (counter == 0)
-                       return true;
-               if (entries.size() < other.entries.size())
-                       for (auto &it : *this) {
-                               auto oit = other.find(it.first);
-                               if (oit == other.end() || oit->second != it.second)
-                                       return false;
-                       }
-               else
-                       for (auto &oit : other) {
-                               auto it = find(oit.first);
-                               if (it == end() || it->second != oit.second)
-                                       return false;
-                       }
+               for (auto &it : entries)
+                       if (!other.count(it.udata))
+                               return false;
                return true;
        }
 
-       bool operator!=(const pool<K, OPS> &other) const {
+       bool operator!=(const pool &other) const {
                return !(*this == other);
        }
 
-       size_t size() const { return counter; }
-       bool empty() const { return counter == 0; }
-       void clear() { hashtable.clear(); entries.clear(); init(); }
+       size_t size() const { return entries.size(); }
+       bool empty() const { return entries.empty(); }
+       void clear() { hashtable.clear(); entries.clear(); }
 
-       iterator begin() { upd_begin_n(); return iterator(this, begin_n); }
+       iterator begin() { return iterator(this, int(entries.size())-1); }
        iterator end() { return iterator(nullptr, -1); }
 
-       const_iterator begin() const { ((pool*)this)->upd_begin_n(); return const_iterator(this, begin_n); }
+       const_iterator begin() const { return const_iterator(this, int(entries.size())-1); }
        const_iterator end() const { return const_iterator(nullptr, -1); }
 };