From 6dcf042368012e2d7ce1626ee5d378bf3ad0ccfc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fran=C3=A7ois=20Dumont?= Date: Mon, 20 Jan 2020 19:01:18 +0100 Subject: [PATCH] libstdc++: Do not over-size hashtable buckets on range insertion We used to consider range size on insertion but on unique keys container not all range values might be inserted resulting in over-sizing. In this case we just consider user reservation and if none then the container will adapt to actually inserted elements. libstdc++-v3/ChangeLog: * include/bits/hashtable.h (_Hashtable<>(_InputIterator, _InputIterator, size_t, const _H1&, const _H2&, const _Hash&, const _Equal&, const _ExtractKey&, const allocator_type&, true_type)): New. (_Hashtable<>(_InputIterator, _InputIterator, size_t, const _H1&, const _H2&, const _Hash&, const _Equal&, const _ExtractKey&, const allocator_type&, false_type)): New. (_Hashtable<>(_InputIterator, _InputIterator, size_t, const _H1&, const _H2&, const _Hash&, const _Equal&, const _ExtractKey&, const allocator_type&)): Delegate to latters. (operator=(initializer_list)): Rehash if too small. (_M_insert(_Arg&&, const _NodeGenerator&, true_type)): Remove size_t len parameter. * include/bits/hashtable_policy.h (_Insert_base<>::_M_insert_range): Do not try to get input range distance. * testsuite/23_containers/unordered_set/cons/bucket_hint.cc: New test. * testsuite/23_containers/unordered_set/modifiers/insert.cc: New test. --- libstdc++-v3/include/bits/hashtable.h | 90 ++++++++++++++----- libstdc++-v3/include/bits/hashtable_policy.h | 36 +++----- .../unordered_set/cons/bucket_hint.cc | 63 +++++++++++++ .../unordered_set/modifiers/insert.cc | 66 ++++++++++++++ 4 files changed, 210 insertions(+), 45 deletions(-) create mode 100644 libstdc++-v3/testsuite/23_containers/unordered_set/cons/bucket_hint.cc create mode 100644 libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/insert.cc diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index 248dd62589b..9d1ad592553 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -460,6 +460,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __hashtable_alloc(__node_alloc_type(__a)) { } + template + _Hashtable(_InputIterator __first, _InputIterator __last, + size_type __bkt_count_hint, + const _H1&, const _H2&, const _Hash&, + const _Equal&, const _ExtractKey&, + const allocator_type&, + true_type __uks); + + template + _Hashtable(_InputIterator __first, _InputIterator __last, + size_type __bkt_count_hint, + const _H1&, const _H2&, const _Hash&, + const _Equal&, const _ExtractKey&, + const allocator_type&, + false_type __uks); + public: // Constructor, destructor, assignment, swap _Hashtable() = default; @@ -468,13 +484,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const _Equal&, const _ExtractKey&, const allocator_type&); - template - _Hashtable(_InputIterator __first, _InputIterator __last, - size_type __bkt_count_hint, - const _H1&, const _H2&, const _Hash&, - const _Equal&, const _ExtractKey&, - const allocator_type&); - _Hashtable(const _Hashtable&); _Hashtable(_Hashtable&&) noexcept; @@ -484,6 +493,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Hashtable(_Hashtable&&, const allocator_type&); // Use delegating constructors. + template + _Hashtable(_InputIterator __first, _InputIterator __last, + size_type __bkt_count_hint, + const _H1& __h1, const _H2& __h2, const _Hash& __h, + const _Equal& __eq, const _ExtractKey& __exk, + const allocator_type& __a) + : _Hashtable(__first, __last, __bkt_count_hint, + __h1, __h2, __h, __eq, __exk, __a, __unique_keys{}) + { } + explicit _Hashtable(const allocator_type& __a) : __hashtable_alloc(__node_alloc_type(__a)) @@ -540,7 +559,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __reuse_or_alloc_node_gen_t __roan(_M_begin(), *this); _M_before_begin._M_nxt = nullptr; clear(); - this->_M_insert_range(__l.begin(), __l.end(), __roan, __unique_keys()); + + // We consider that all elements of __l are going to be inserted. + auto __l_bkt_count = _M_rehash_policy._M_bkt_for_elements(__l.size()); + + // Do not shrink to keep potential user reservation. + if (_M_bucket_count < __l_bkt_count) + rehash(__l_bkt_count); + + this->_M_insert_range(__l.begin(), __l.end(), __roan, __unique_keys{}); return *this; } @@ -749,41 +776,41 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Emplace with hint, useless when keys are unique. template iterator - _M_emplace(const_iterator, true_type __uk, _Args&&... __args) - { return _M_emplace(__uk, std::forward<_Args>(__args)...).first; } + _M_emplace(const_iterator, true_type __uks, _Args&&... __args) + { return _M_emplace(__uks, std::forward<_Args>(__args)...).first; } template iterator - _M_emplace(const_iterator, false_type, _Args&&... __args); + _M_emplace(const_iterator, false_type __uks, _Args&&... __args); template std::pair - _M_insert(_Arg&&, const _NodeGenerator&, true_type, size_type = 1); + _M_insert(_Arg&&, const _NodeGenerator&, true_type __uks); template iterator _M_insert(_Arg&& __arg, const _NodeGenerator& __node_gen, - false_type __uk) + false_type __uks) { return _M_insert(cend(), std::forward<_Arg>(__arg), __node_gen, - __uk); + __uks); } // Insert with hint, not used when keys are unique. template iterator _M_insert(const_iterator, _Arg&& __arg, - const _NodeGenerator& __node_gen, true_type __uk) + const _NodeGenerator& __node_gen, true_type __uks) { return - _M_insert(std::forward<_Arg>(__arg), __node_gen, __uk).first; + _M_insert(std::forward<_Arg>(__arg), __node_gen, __uks).first; } // Insert with hint when keys are not unique. template iterator _M_insert(const_iterator, _Arg&&, - const _NodeGenerator&, false_type); + const _NodeGenerator&, false_type __uks); size_type _M_erase(true_type, const key_type&); @@ -1032,7 +1059,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION size_type __bkt_count_hint, const _H1& __h1, const _H2& __h2, const _Hash& __h, const _Equal& __eq, const _ExtractKey& __exk, - const allocator_type& __a) + const allocator_type& __a, true_type /* __uks */) + : _Hashtable(__bkt_count_hint, __h1, __h2, __h, __eq, __exk, __a) + { + for (; __f != __l; ++__f) + this->insert(*__f); + } + + template + template + _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, + _H1, _H2, _Hash, _RehashPolicy, _Traits>:: + _Hashtable(_InputIterator __f, _InputIterator __l, + size_type __bkt_count_hint, + const _H1& __h1, const _H2& __h2, const _Hash& __h, + const _Equal& __eq, const _ExtractKey& __exk, + const allocator_type& __a, false_type /* __uks */) : _Hashtable(__h1, __h2, __h, __eq, __exk, __a) { auto __nb_elems = __detail::__distance_fw(__f, __l); @@ -1802,8 +1847,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>:: - _M_insert(_Arg&& __v, const _NodeGenerator& __node_gen, true_type, - size_type __n_elt) + _M_insert(_Arg&& __v, const _NodeGenerator& __node_gen, + true_type /* __uks */) -> pair { const key_type& __k = this->_M_extract()(__v); @@ -1815,7 +1860,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Scoped_node __node{ __node_gen(std::forward<_Arg>(__v)), this }; auto __pos - = _M_insert_unique_node(__k, __bkt, __code, __node._M_node, __n_elt); + = _M_insert_unique_node(__k, __bkt, __code, __node._M_node); __node._M_node = nullptr; return { __pos, true }; } @@ -1830,7 +1875,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>:: _M_insert(const_iterator __hint, _Arg&& __v, - const _NodeGenerator& __node_gen, false_type) + const _NodeGenerator& __node_gen, + false_type /* __uks */) -> iterator { // First compute the hash code so that we don't do anything if it diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index a91ae21a906..26ea9bf29ce 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -820,12 +820,12 @@ namespace __detail template void _M_insert_range(_InputIterator __first, _InputIterator __last, - const _NodeGetter&, true_type); + const _NodeGetter&, true_type __uks); template void _M_insert_range(_InputIterator __first, _InputIterator __last, - const _NodeGetter&, false_type); + const _NodeGetter&, false_type __uks); public: __ireturn_type @@ -833,7 +833,7 @@ namespace __detail { __hashtable& __h = _M_conjure_hashtable(); __node_gen_type __node_gen(__h); - return __h._M_insert(__v, __node_gen, __unique_keys()); + return __h._M_insert(__v, __node_gen, __unique_keys{}); } iterator @@ -841,7 +841,7 @@ namespace __detail { __hashtable& __h = _M_conjure_hashtable(); __node_gen_type __node_gen(__h); - return __h._M_insert(__hint, __v, __node_gen, __unique_keys()); + return __h._M_insert(__hint, __v, __node_gen, __unique_keys{}); } template @@ -876,7 +876,7 @@ namespace __detail { __hashtable& __h = _M_conjure_hashtable(); __node_gen_type __node_gen(__h); - return _M_insert_range(__first, __last, __node_gen, __unique_keys()); + return _M_insert_range(__first, __last, __node_gen, __unique_keys{}); } }; @@ -889,21 +889,11 @@ namespace __detail _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>:: _M_insert_range(_InputIterator __first, _InputIterator __last, - const _NodeGetter& __node_gen, true_type) + const _NodeGetter& __node_gen, true_type __uks) { - size_type __n_elt = __detail::__distance_fw(__first, __last); - if (__n_elt == 0) - return; - __hashtable& __h = _M_conjure_hashtable(); for (; __first != __last; ++__first) - { - if (__h._M_insert(*__first, __node_gen, __unique_keys(), - __n_elt).second) - __n_elt = 1; - else if (__n_elt != 1) - --__n_elt; - } + __h._M_insert(*__first, __node_gen, __uks); } template:: _M_insert_range(_InputIterator __first, _InputIterator __last, - const _NodeGetter& __node_gen, false_type) + const _NodeGetter& __node_gen, false_type __uks) { using __rehash_type = typename __hashtable::__rehash_type; using __rehash_state = typename __hashtable::__rehash_state; @@ -936,7 +926,7 @@ namespace __detail __h._M_rehash(__do_rehash.second, __saved_state); for (; __first != __last; ++__first) - __h._M_insert(*__first, __node_gen, __unique_keys()); + __h._M_insert(*__first, __node_gen, __uks); } /** @@ -986,7 +976,7 @@ namespace __detail { __hashtable& __h = this->_M_conjure_hashtable(); __node_gen_type __node_gen(__h); - return __h._M_insert(std::move(__v), __node_gen, __unique_keys()); + return __h._M_insert(std::move(__v), __node_gen, __unique_keys{}); } iterator @@ -995,7 +985,7 @@ namespace __detail __hashtable& __h = this->_M_conjure_hashtable(); __node_gen_type __node_gen(__h); return __h._M_insert(__hint, std::move(__v), __node_gen, - __unique_keys()); + __unique_keys{}); } }; @@ -1036,7 +1026,7 @@ namespace __detail insert(_Pair&& __v) { __hashtable& __h = this->_M_conjure_hashtable(); - return __h._M_emplace(__unique_keys(), std::forward<_Pair>(__v)); + return __h._M_emplace(__unique_keys{}, std::forward<_Pair>(__v)); } template> @@ -1044,7 +1034,7 @@ namespace __detail insert(const_iterator __hint, _Pair&& __v) { __hashtable& __h = this->_M_conjure_hashtable(); - return __h._M_emplace(__hint, __unique_keys(), + return __h._M_emplace(__hint, __unique_keys{}, std::forward<_Pair>(__v)); } }; diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/cons/bucket_hint.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/cons/bucket_hint.cc new file mode 100644 index 00000000000..100eb5a27df --- /dev/null +++ b/libstdc++-v3/testsuite/23_containers/unordered_set/cons/bucket_hint.cc @@ -0,0 +1,63 @@ +// { dg-do run { target c++11 } } + +// Copyright (C) 2020 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +#include +#include +#include + +#include + +void test01() +{ + std::unordered_set a; + a.reserve(2); + + std::unordered_set b({ 0, 1, 0, 1, 0, 1, 0, 1 }, a.bucket_count()); + VERIFY( b.bucket_count() == a.bucket_count() ); +} + +void test02() +{ + std::unordered_set a; + a.reserve(2); + + std::vector v { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }; + + std::unordered_set b(v.begin(), v.end(), a.bucket_count()); + VERIFY( b.bucket_count() == a.bucket_count() ); +} + +void test03() +{ + std::unordered_set a; + a.reserve(2); + + std::forward_list fl { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }; + + std::unordered_set b(fl.begin(), fl.end(), a.bucket_count()); + VERIFY( b.bucket_count() == a.bucket_count() ); +} + +int main() +{ + test01(); + test02(); + test03(); + return 0; +} diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/insert.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/insert.cc new file mode 100644 index 00000000000..acf01c73b1b --- /dev/null +++ b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/insert.cc @@ -0,0 +1,66 @@ +// { dg-do run { target c++11 } } + +// Copyright (C) 2020 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +#include +#include +#include + +#include + +void test01() +{ + std::unordered_set a; + a.reserve(2); + + auto bkt_count = a.bucket_count(); + a.insert({ 0, 1, 0, 1, 0, 1, 0, 1 }); + VERIFY( a.bucket_count() == bkt_count ); +} + +void test02() +{ + std::unordered_set a; + a.reserve(2); + + std::vector v { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }; + + auto bkt_count = a.bucket_count(); + a.insert(v.begin(), v.end()); + VERIFY( a.bucket_count() == bkt_count ); +} + +void test03() +{ + std::unordered_set a; + a.reserve(2); + + std::forward_list fl { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }; + + auto bkt_count = a.bucket_count(); + a.insert(fl.begin(), fl.end()); + VERIFY( a.bucket_count() == bkt_count ); +} + +int main() +{ + test01(); + test02(); + test03(); + return 0; +} -- 2.30.2