From f9985df51b32cb265f429d024e955e71fd23a4f1 Mon Sep 17 00:00:00 2001 From: Johannes Singler Date: Mon, 7 Apr 2008 08:27:34 +0000 Subject: [PATCH] re PR libstdc++/35588 ([parallel mode] parallel std::sort and bind()) 2008-04-07 Johannes Singler * include/parallel/multiway_merge.h: Moved decisions to compiletime instead of runtime. * include/parallel/losertree.h: Removed obsolete variants, added variant that uses pointers in the loser tree. * include/parallel/types.h: Remove obsolete settings options from enum. * include/parallel/features.h: Remove obsolete compile-time switches. * include/parallel/compiletime_settings.h: Remove obsolete variant that copies back *after* sorting. * include/parallel/tags.h: Add one new tag for compile-time switch. * include/parallel/merge.h: Adapt to changes in multiway_merge.h. * include/parallel/multiway_mergesort.h: Adapt to changes in multiway_merge.h. Factor out splitting variants. Remove obsolete variant that copies back *after* sorting. * include/parallel/sort.h: Adapt to changes in multiway_mergesort.h. * testsuite/25_algorithms/sort/35588.cc: Added test case from / for PR 35588. From-SVN: r133975 --- libstdc++-v3/ChangeLog | 26 + .../include/parallel/compiletime_settings.h | 10 +- libstdc++-v3/include/parallel/features.h | 60 - libstdc++-v3/include/parallel/losertree.h | 1883 ++++++------- libstdc++-v3/include/parallel/merge.h | 17 +- .../include/parallel/multiway_merge.h | 2421 +++++++++-------- .../include/parallel/multiway_mergesort.h | 420 +-- libstdc++-v3/include/parallel/sort.h | 44 +- libstdc++-v3/include/parallel/tags.h | 3 + libstdc++-v3/include/parallel/types.h | 11 +- .../testsuite/25_algorithms/sort/35588.cc | 32 + 11 files changed, 2538 insertions(+), 2389 deletions(-) create mode 100644 libstdc++-v3/testsuite/25_algorithms/sort/35588.cc diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index f5900740560..e452aaab6a8 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,29 @@ +2008-04-07 Johannes Singler + + * include/parallel/multiway_merge.h: + Moved decisions to compiletime instead of runtime. + * include/parallel/losertree.h: + Removed obsolete variants, added variant that uses pointers + in the loser tree. + * include/parallel/types.h: + Remove obsolete settings options from enum. + * include/parallel/features.h: + Remove obsolete compile-time switches. + * include/parallel/compiletime_settings.h: + Remove obsolete variant that copies back *after* sorting. + * include/parallel/tags.h: + Add one new tag for compile-time switch. + * include/parallel/merge.h: + Adapt to changes in multiway_merge.h. + * include/parallel/multiway_mergesort.h: + Adapt to changes in multiway_merge.h. + Factor out splitting variants. + Remove obsolete variant that copies back *after* sorting. + * include/parallel/sort.h: + Adapt to changes in multiway_mergesort.h. + * testsuite/25_algorithms/sort/35588.cc: + Added test case from / for PR 35588. + 2008-03-29 Paolo Carlini PR libstdc++/35725 diff --git a/libstdc++-v3/include/parallel/compiletime_settings.h b/libstdc++-v3/include/parallel/compiletime_settings.h index edaea3856ad..8ab89aa8ee9 100644 --- a/libstdc++-v3/include/parallel/compiletime_settings.h +++ b/libstdc++-v3/include/parallel/compiletime_settings.h @@ -73,17 +73,9 @@ * __gnu_parallel::parallel_random_shuffle(). */ #define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0 #endif -#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB +#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB /** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. * Consider the size of the TLB for * __gnu_parallel::parallel_random_shuffle(). */ #define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0 #endif - -#ifndef _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST -/** @brief First copy the data, sort it locally, and merge it back - * (0); or copy it back after everything is done (1). - * - * Recommendation: 0 */ -#define _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST 0 -#endif diff --git a/libstdc++-v3/include/parallel/features.h b/libstdc++-v3/include/parallel/features.h index 2e09980405e..7150c20affc 100644 --- a/libstdc++-v3/include/parallel/features.h +++ b/libstdc++-v3/include/parallel/features.h @@ -61,66 +61,6 @@ #define _GLIBCXX_BAL_QUICKSORT 1 #endif -#ifndef _GLIBCXX_LOSER_TREE -/** @def _GLIBCXX_LOSER_TREE - * @brief Include guarded (sequences may run empty) loser tree, - * moving objects. - * @see __gnu_parallel::_Settings multiway_merge_algorithm */ -#define _GLIBCXX_LOSER_TREE 1 -#endif - -#ifndef _GLIBCXX_LOSER_TREE_EXPLICIT -/** @def _GLIBCXX_LOSER_TREE_EXPLICIT - * @brief Include standard loser tree, storing two flags for infimum - * and supremum. - * @see __gnu_parallel::_Settings multiway_merge_algorithm */ -#define _GLIBCXX_LOSER_TREE_EXPLICIT 0 -#endif - -#ifndef _GLIBCXX_LOSER_TREE_REFERENCE -/** @def _GLIBCXX_LOSER_TREE_REFERENCE - * @brief Include some loser tree variant. - * @see __gnu_parallel::_Settings multiway_merge_algorithm */ -#define _GLIBCXX_LOSER_TREE_REFERENCE 0 -#endif - -#ifndef _GLIBCXX_LOSER_TREE_POINTER -/** @def _GLIBCXX_LOSER_TREE_POINTER - * @brief Include some loser tree variant. - * @see __gnu_parallel::_Settings multiway_merge_algorithm */ -#define _GLIBCXX_LOSER_TREE_POINTER 1 -#endif - -#ifndef _GLIBCXX_LOSER_TREE_UNGUARDED -/** @def _GLIBCXX_LOSER_TREE_UNGUARDED - * @brief Include unguarded (sequences must not run empty) loser - * tree, moving objects. - * @see __gnu_parallel::_Settings multiway_merge_algorithm */ -#define _GLIBCXX_LOSER_TREE_UNGUARDED 0 -#endif - -#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED -/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED - * @brief Include some loser tree variant. - * @see __gnu_parallel::_Settings multiway_merge_algorithm */ -#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1 -#endif - -#ifndef _GLIBCXX_LOSER_TREE_COMBINED -/** @def _GLIBCXX_LOSER_TREE_COMBINED - * @brief Include some loser tree variant. - * @see __gnu_parallel::_Settings multiway_merge_algorithm */ -#define _GLIBCXX_LOSER_TREE_COMBINED 0 -#endif - -#ifndef _GLIBCXX_LOSER_TREE_SENTINEL -/** @def _GLIBCXX_LOSER_TREE_SENTINEL - * @brief Include some loser tree variant. - * @see __gnu_parallel::_Settings multiway_merge_algorithm */ -#define _GLIBCXX_LOSER_TREE_SENTINEL 0 -#endif - - #ifndef _GLIBCXX_FIND_GROWING_BLOCKS /** @brief Include the growing blocks variant for std::find. * @see __gnu_parallel::_Settings::find_algorithm */ diff --git a/libstdc++-v3/include/parallel/losertree.h b/libstdc++-v3/include/parallel/losertree.h index ddeb0d36d6c..cae15c0826e 100644 --- a/libstdc++-v3/include/parallel/losertree.h +++ b/libstdc++-v3/include/parallel/losertree.h @@ -47,878 +47,725 @@ namespace __gnu_parallel { -#if _GLIBCXX_LOSER_TREE_EXPLICIT - -/** @brief Guarded loser tree, copying the whole element into the -* tree structure. -* -* Guarding is done explicitly through two flags per element, inf -* and sup This is a quite slow variant. -*/ -template > - class LoserTreeExplicit +/** + * @brief Guarded loser/tournament tree. + * + * The smallest element is at the top. + * + * Guarding is done explicitly through one flag sup per element, + * inf is not needed due to a better initialization routine. This + * is a well-performing variant. + * + * @param T the element type + * @param Comparator the comparator to use, defaults to std::less + */ +template +class LoserTreeBase +{ +protected: + /** @brief Internal representation of a LoserTree element. */ + struct Loser { - private: - struct Loser - { - // The relevant element. - T key; - - // Is this an infimum or supremum element? - bool inf, sup; - - // Number of the sequence the element comes from. - int source; - }; - - unsigned int size, offset; - Loser* losers; - Comparator comp; - - public: - LoserTreeExplicit(unsigned int _size, Comparator _comp = std::less()) - : comp(_comp) - { - size = _size; - offset = size; - losers = new Loser[size]; - for (unsigned int l = 0; l < size; ++l) - { - //losers[l].key = ... stays unset - losers[l].inf = true; - losers[l].sup = false; - //losers[l].source = -1; //sentinel - } - } - - ~LoserTreeExplicit() - { delete[] losers; } + /** @brief flag, true iff this is a "maximum" sentinel. */ + bool sup; + /** @brief index of the source sequence. */ + int source; + /** @brief key of the element in the LoserTree. */ + T key; + }; - int - get_min_source() - { return losers[0].source; } + unsigned int ik, k, offset; + + /** log_2{k} */ + unsigned int _M_log_k; + + /** @brief LoserTree elements. */ + Loser* losers; + + /** @brief Comparator to use. */ + Comparator comp; + + /** + * @brief State flag that determines whether the LoserTree is empty. + * + * Only used for building the LoserTree. + */ + bool first_insert; + +public: + /** + * @brief The constructor. + * + * @param _k The number of sequences to merge. + * @param _comp The comparator to use. + */ + LoserTreeBase(unsigned int _k, Comparator _comp) + : comp(_comp) + { + ik = _k; + + // Compute log_2{k} for the Loser Tree + _M_log_k = log2(ik - 1) + 1; + + // Next greater power of 2. + k = 1 << _M_log_k; + offset = k; + + // Avoid default-constructing losers[].key + losers = static_cast(::operator new(2 * k * sizeof(Loser))); + for (unsigned int i = ik - 1; i < k; ++i) + losers[i + k].sup = true; + + first_insert = true; + } + + /** + * @brief The destructor. + */ + ~LoserTreeBase() + { ::operator delete(losers); } + + /** + * @brief Initializes the sequence "source" with the element "key". + * + * @param key the element to insert + * @param source index of the source sequence + * @param sup flag that determines whether the value to insert is an + * explicit supremum. + */ + inline void + insert_start(const T& key, int source, bool sup) + { + unsigned int pos = k + source; + + if(first_insert) + { + // Construct all keys, so we can easily deconstruct them. + for (unsigned int i = 0; i < (2 * k); ++i) + new(&(losers[i].key)) T(key); + first_insert = false; + } + else + new(&(losers[pos].key)) T(key); + + losers[pos].sup = sup; + losers[pos].source = source; + } + + /** + * @return the index of the sequence with the smallest element. + */ + int get_min_source() + { return losers[0].source; } +}; + +/** + * @brief Stable LoserTree variant. + * + * Provides the stable implementations of insert_start, init_winner, + * init and delete_min_insert. + * + * Unstable variant is done using partial specialisation below. + */ +template +class LoserTree : public LoserTreeBase +{ + typedef LoserTreeBase Base; + using Base::k; + using Base::losers; + using Base::first_insert; + +public: + LoserTree(unsigned int _k, Comparator _comp) + : Base::LoserTreeBase(_k, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup + || (!losers[left].sup + && !comp(losers[right].key, losers[left].key))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + void init() + { losers[0] = losers[init_winner(1)]; } + + /** + * @brief Delete the smallest element and insert a new element from + * the previously smallest element's sequence. + * + * This implementation is stable. + */ + // Do not pass a const reference since key will be used as local variable. + void delete_min_insert(T key, bool sup) + { + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if ((sup && (!losers[pos].sup || losers[pos].source < source)) + || (!sup && !losers[pos].sup + && ((comp(losers[pos].key, key)) + || (!comp(key, losers[pos].key) + && losers[pos].source < source)))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].key = key; + } +}; + +/** + * @brief Unstable LoserTree variant. + * + * Stability (non-stable here) is selected with partial specialization. + */ +template +class LoserTree : + public LoserTreeBase +{ + typedef LoserTreeBase Base; + using Base::_M_log_k; + using Base::k; + using Base::losers; + using Base::first_insert; + +public: + LoserTree(unsigned int _k, Comparator _comp) + : Base::LoserTreeBase(_k, _comp) + {} + + /** + * Computes the winner of the competition at position "root". + * + * Called recursively (starting at 0) to build the initial tree. + * + * @param root index of the "game" to start. + */ + unsigned int + init_winner (unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup || + (!losers[left].sup + && !comp(losers[right].key, losers[left].key))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { losers[0] = losers[init_winner(1)]; } + + /** + * Delete the key smallest element and insert the element key instead. + * + * @param key the key to insert + * @param sup true iff key is an explicitly marked supremum + */ + // Do not pass a const reference since key will be used as local variable. + inline void + delete_min_insert(T key, bool sup) + { +#if _GLIBCXX_ASSERTIONS + // loser trees are only used for at least 2 sequences + _GLIBCXX_PARALLEL_ASSERT(_M_log_k > 1); +#endif - void - insert_start(T key, int source, bool sup) + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) { - bool inf = false; - for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) - { - if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup - && comp(losers[pos].key, key)) || losers[pos].inf || sup) - { - // The other one is smaller. - std::swap(losers[pos].key, key); - std::swap(losers[pos].inf, inf); - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); - } - } - - losers[0].key = key; - losers[0].inf = inf; - losers[0].sup = sup; - losers[0].source = source; + // The smaller one gets promoted. + if (sup || (!losers[pos].sup && comp(losers[pos].key, key))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } } - void - init() { } + losers[0].sup = sup; + losers[0].source = source; + losers[0].key = key; + } +}; - void - delete_min_insert(T key, bool sup) - { - bool inf = false; - int source = losers[0].source; - for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted. - if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup - && comp(losers[pos].key, key)) - || losers[pos].inf || sup) - { - // The other one is smaller. - std::swap(losers[pos].key, key); - std::swap(losers[pos].inf, inf); - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); - } - } - - losers[0].key = key; - losers[0].inf = inf; - losers[0].sup = sup; - losers[0].source = source; - } - - void - insert_start_stable(T key, int source, bool sup) - { - bool inf = false; - for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) - { - if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup - && ((comp(losers[pos].key, key)) - || (!comp(key, losers[pos].key) - && losers[pos].source < source))) - || losers[pos].inf || sup) - { - // Take next key. - std::swap(losers[pos].key, key); - std::swap(losers[pos].inf, inf); - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); - } - } - - losers[0].key = key; - losers[0].inf = inf; - losers[0].sup = sup; - losers[0].source = source; - } - void - init_stable() { } - - void - delete_min_insert_stable(T key, bool sup) - { - bool inf = false; - int source = losers[0].source; - for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) - { - if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup - && ((comp(losers[pos].key, key)) - || (!comp(key, losers[pos].key) - && losers[pos].source < source))) - || losers[pos].inf || sup) - { - std::swap(losers[pos].key, key); - std::swap(losers[pos].inf, inf); - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); - } - } - - losers[0].key = key; - losers[0].inf = inf; - losers[0].sup = sup; - losers[0].source = source; - } +/** + * @brief Base class of Loser Tree implementation using pointers. + */ +template +class LoserTreePointerBase +{ +protected: + /** @brief Internal representation of LoserTree elements. */ + struct Loser + { + bool sup; + int source; + const T* keyp; }; -#endif - -#if _GLIBCXX_LOSER_TREE + unsigned int ik, k, offset; + Loser* losers; + Comparator comp; -/** @brief Guarded loser tree, either copying the whole element into -* the tree structure, or looking up the element via the index. -* -* Guarding is done explicitly through one flag sup per element, -* inf is not needed due to a better initialization routine. This -* is a well-performing variant. -*/ -template > - class LoserTree - { - private: - struct Loser - { - bool sup; - int source; - T key; - }; - - unsigned int ik, k, offset; - Loser* losers; - Comparator comp; - bool first_insert; - - public: - LoserTree(unsigned int _k, Comparator _comp = std::less()) +public: + LoserTreePointerBase(unsigned int _k, Comparator _comp = std::less()) : comp(_comp) - { - ik = _k; - - // Next greater power of 2. - k = 1 << (log2(ik - 1) + 1); - offset = k; - // Avoid default-constructing losers[].key - losers = static_cast(::operator new(2 * k * sizeof(Loser))); - for (unsigned int i = ik - 1; i < k; ++i) - losers[i + k].sup = true; - - first_insert = true; - } + { + ik = _k; - ~LoserTree() - { ::operator delete(losers); } + // Next greater power of 2. + k = 1 << (log2(ik - 1) + 1); + offset = k; + losers = new Loser[k * 2]; + for (unsigned int i = ik - 1; i < k; i++) + losers[i + k].sup = true; + } - int - get_min_source() - { return losers[0].source; } + ~LoserTreePointerBase() + { ::operator delete(losers); } - void - insert_start(const T& key, int source, bool sup) - { - unsigned int pos = k + source; - - if(first_insert) - { - // Construct all keys, so we can easily deconstruct them. - for (unsigned int i = 0; i < (2 * k); ++i) - ::new(&(losers[i].key)) T(key); - first_insert = false; - } - else - ::new(&(losers[pos].key)) T(key); - - losers[pos].sup = sup; - losers[pos].source = source; - } + int get_min_source() + { return losers[0].source; } - unsigned int - init_winner (unsigned int root) - { - if (root >= k) - { - return root; - } - else - { - unsigned int left = init_winner (2 * root); - unsigned int right = init_winner (2 * root + 1); - if (losers[right].sup - || (!losers[left].sup - && !comp(losers[right].key, losers[left].key))) - { - // Left one is less or equal. - losers[root] = losers[right]; - return left; - } - else - { - // Right one is less. - losers[root] = losers[left]; - return right; - } - } - } + void insert_start(const T& key, int source, bool sup) + { + unsigned int pos = k + source; + + losers[pos].sup = sup; + losers[pos].source = source; + losers[pos].keyp = &key; + } +}; + +/** + * @brief Stable LoserTree implementation. + * + * The unstable variant is implemented using partial instantiation below. + */ +template +class LoserTreePointer : public LoserTreePointerBase +{ + typedef LoserTreePointerBase Base; + using Base::k; + using Base::losers; - void - init() - { losers[0] = losers[init_winner(1)]; } +public: + LoserTreePointer(unsigned int _k, Comparator _comp = std::less()) + : Base::LoserTreePointerBase(_k, _comp) + {} - // Do not pass const reference since key will be used as local variable. - void - delete_min_insert(T key, bool sup) - { - int source = losers[0].source; - for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted. - if (sup || (!losers[pos].sup && comp(losers[pos].key, key))) - { - // The other one is smaller. - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); - std::swap(losers[pos].key, key); - } - } - - losers[0].sup = sup; - losers[0].source = source; - losers[0].key = key; - } + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup + || (!losers[left].sup && !comp(*losers[right].keyp, + *losers[left].keyp))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + void init() + { losers[0] = losers[init_winner(1)]; } + + void delete_min_insert(const T& key, bool sup) + { + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if ((sup && (!losers[pos].sup || losers[pos].source < source)) || + (!sup && !losers[pos].sup && + ((comp(*losers[pos].keyp, *keyp)) || + (!comp(*keyp, *losers[pos].keyp) + && losers[pos].source < source)))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].keyp = keyp; + } +}; + +/** + * @brief Unstable LoserTree implementation. + * + * The stable variant is above. + */ +template +class LoserTreePointer : + public LoserTreePointerBase +{ + typedef LoserTreePointerBase Base; + using Base::k; + using Base::losers; - void - insert_start_stable(const T& key, int source, bool sup) - { return insert_start(key, source, sup); } +public: + LoserTreePointer(unsigned int _k, Comparator _comp = std::less()) + : Base::LoserTreePointerBase(_k, _comp) + {} - unsigned int - init_winner_stable (unsigned int root) - { - if (root >= k) - { - return root; - } - else - { - unsigned int left = init_winner (2 * root); - unsigned int right = init_winner (2 * root + 1); - if (losers[right].sup + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (losers[right].sup || (!losers[left].sup - && !comp(losers[right].key, losers[left].key))) - { - // Left one is less or equal. - losers[root] = losers[right]; - return left; - } - else - { - // Right one is less. - losers[root] = losers[left]; - return right; - } - } - } - - void - init_stable() - { losers[0] = losers[init_winner_stable(1)]; } - - // Do not pass const reference since key will be used as local variable. - void - delete_min_insert_stable(T key, bool sup) - { - int source = losers[0].source; - for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted, ties are broken by source. - if ( (sup && (!losers[pos].sup || losers[pos].source < source)) - || (!sup && !losers[pos].sup - && ((comp(losers[pos].key, key)) - || (!comp(key, losers[pos].key) - && losers[pos].source < source)))) - { - // The other one is smaller. - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); - std::swap(losers[pos].key, key); - } - } - - losers[0].sup = sup; - losers[0].source = source; - losers[0].key = key; - } - }; - -#endif - -#if _GLIBCXX_LOSER_TREE_REFERENCE - -/** @brief Guarded loser tree, either copying the whole element into -* the tree structure, or looking up the element via the index. -* -* Guarding is done explicitly through one flag sup per element, -* inf is not needed due to a better initialization routine. This -* is a well-performing variant. -*/ -template > - class LoserTreeReference + && !comp(*losers[right].keyp, *losers[left].keyp))) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + void init() + { losers[0] = losers[init_winner(1)]; } + + void delete_min_insert(const T& key, bool sup) { -#undef COPY -#ifdef COPY -#define KEY(i) losers[i].key -#define KEY_SOURCE(i) key -#else -#define KEY(i) keys[losers[i].source] -#define KEY_SOURCE(i) keys[i] -#endif - private: - struct Loser - { - bool sup; - int source; -#ifdef COPY - T key; -#endif - }; + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp))) + { + // The other one is smaller. + std::swap(losers[pos].sup, sup); + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].sup = sup; + losers[0].source = source; + losers[0].keyp = keyp; + } +}; + +/** @brief Base class for unguarded LoserTree implementation. + * + * The whole element is copied into the tree structure. + * + * No guarding is done, therefore not a single input sequence must + * run empty. Unused sequence heads are marked with a sentinel which + * is > all elements that are to be merged. + * + * This is a very fast variant. + */ +template +class LoserTreeUnguardedBase +{ +protected: + struct Loser + { + int source; + T key; + }; - unsigned int ik, k, offset; - Loser* losers; -#ifndef COPY - T* keys; -#endif - Comparator comp; + unsigned int ik, k, offset; + Loser* losers; + Comparator comp; - public: - LoserTreeReference(unsigned int _k, Comparator _comp = std::less()) +public: + inline + LoserTreeUnguardedBase(unsigned int _k, const T _sentinel, + Comparator _comp = std::less()) : comp(_comp) - { - ik = _k; - - // Next greater power of 2. - k = 1 << (log2(ik - 1) + 1); - offset = k; - losers = new Loser[k * 2]; -#ifndef COPY - keys = new T[ik]; -#endif - for (unsigned int i = ik - 1; i < k; ++i) - losers[i + k].sup = true; - } - - ~LoserTreeReference() - { - delete[] losers; -#ifndef COPY - delete[] keys; -#endif - } - - int - get_min_source() - { return losers[0].source; } - - void - insert_start(T key, int source, bool sup) - { - unsigned int pos = k + source; - - losers[pos].sup = sup; - losers[pos].source = source; - KEY(pos) = key; - } - - unsigned int - init_winner(unsigned int root) - { - if (root >= k) - { - return root; - } - else - { - unsigned int left = init_winner (2 * root); - unsigned int right = init_winner (2 * root + 1); - if ( losers[right].sup || - (!losers[left].sup && !comp(KEY(right), KEY(left)))) - { - // Left one is less or equal. - losers[root] = losers[right]; - return left; - } - else - { - // Right one is less. - losers[root] = losers[left]; - return right; - } - } - } - - void - init() - { - losers[0] = losers[init_winner(1)]; - } - - void - delete_min_insert(T key, bool sup) - { - int source = losers[0].source; - for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted. - if (sup || (!losers[pos].sup && comp(KEY(pos), KEY_SOURCE(source)))) - { - // The other one is smaller. - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); -#ifdef COPY - std::swap(KEY(pos), KEY_SOURCE(source)); -#endif - } - } - - losers[0].sup = sup; - losers[0].source = source; -#ifdef COPY - KEY(0) = KEY_SOURCE(source); + { + ik = _k; + + // Next greater power of 2. + k = 1 << (log2(ik - 1) + 1); + offset = k; + // Avoid default-constructing losers[].key + losers = static_cast(::operator new(2 * k * sizeof(Loser))); + + for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i) + { + losers[i].key = _sentinel; + losers[i].source = -1; + } + } + + inline ~LoserTreeUnguardedBase() + { ::operator delete(losers); } + + inline int + get_min_source() + { + // no dummy sequence can ever be at the top! +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif - } + return losers[0].source; + } - void - insert_start_stable(T key, int source, bool sup) - { return insert_start(key, source, sup); } - - unsigned int - init_winner_stable(unsigned int root) - { - if (root >= k) - { - return root; - } - else - { - unsigned int left = init_winner (2 * root); - unsigned int right = init_winner (2 * root + 1); - if (losers[right].sup - || (!losers[left].sup && !comp(KEY(right), KEY(left)))) - { - // Left one is less or equal. - losers[root] = losers[right]; - return left; - } - else - { - // Right one is less. - losers[root] = losers[left]; - return right; - } - } - } - - void - init_stable() - { losers[0] = losers[init_winner_stable(1)]; } - - void - delete_min_insert_stable(T key, bool sup) - { - int source = losers[0].source; - for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted, ties are broken by source. - if ((sup && (!losers[pos].sup || losers[pos].source < source)) - || (!sup && !losers[pos].sup - && ((comp(KEY(pos), KEY_SOURCE(source))) - || (!comp(KEY_SOURCE(source), KEY(pos)) - && losers[pos].source < source)))) - { - // The other one is smaller. - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); -#ifdef COPY - std::swap(KEY(pos), KEY_SOURCE(source)); -#endif - } - } + inline void + insert_start(const T& key, int source, bool) + { + unsigned int pos = k + source; + + new(&(losers[pos].key)) T(key); + losers[pos].source = source; + } +}; + +/** + * @brief Stable implementation of unguarded LoserTree. + * + * Unstable variant is selected below with partial specialization. + */ +template +class LoserTreeUnguarded : public LoserTreeUnguardedBase +{ + typedef LoserTreeUnguardedBase Base; + using Base::k; + using Base::losers; + +public: + LoserTreeUnguarded(unsigned int _k, const T _sentinel, + Comparator _comp = std::less()) + : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (!comp(losers[right].key, losers[left].key)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { + losers[0] = losers[init_winner(1)]; - losers[0].sup = sup; - losers[0].source = source; -#ifdef COPY - KEY(0) = KEY_SOURCE(source); + // no dummy sequence can ever be at the top at the beginning (0 sequences!) +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif - } - }; -#undef KEY -#undef KEY_SOURCE + } + // Do not pass a const reference since key will be used as local variable. + inline void + delete_min_insert(T key, bool) + { + // No dummy sequence can ever be at the top and be retrieved! +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif -#if _GLIBCXX_LOSER_TREE_POINTER - -/** @brief Guarded loser tree, either copying the whole element into - the tree structure, or looking up the element via the index. -* Guarding is done explicitly through one flag sup per element, -* inf is not needed due to a better initialization routine. -* This is a well-performing variant. -*/ -template > - class LoserTreePointer + int source = losers[0].source; + printf("%d\n", source); + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if (comp(losers[pos].key, key) + || (!comp(key, losers[pos].key) && losers[pos].source < source)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } + } + + losers[0].source = source; + losers[0].key = key; + } +}; + +/** + * @brief Non-Stable implementation of unguarded LoserTree. + * + * Stable implementation is above. + */ +template +class LoserTreeUnguarded : + public LoserTreeUnguardedBase +{ + typedef LoserTreeUnguardedBase Base; + using Base::k; + using Base::losers; + +public: + LoserTreeUnguarded(unsigned int _k, const T _sentinel, + Comparator _comp = std::less()) + : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp) + {} + + unsigned int + init_winner (unsigned int root) { - private: - struct Loser - { - bool sup; - int source; - const T* keyp; - }; - - unsigned int ik, k, offset; - Loser* losers; - Comparator comp; - - public: - LoserTreePointer(unsigned int _k, Comparator _comp = std::less()) - : comp(_comp) - { - ik = _k; - - // Next greater power of 2. - k = 1 << (log2(ik - 1) + 1); - offset = k; - losers = new Loser[k * 2]; - for (unsigned int i = ik - 1; i < k; ++i) - losers[i + k].sup = true; - } - - ~LoserTreePointer() - { delete[] losers; } - - int - get_min_source() - { return losers[0].source; } - - void - insert_start(const T& key, int source, bool sup) - { - unsigned int pos = k + source; - - losers[pos].sup = sup; - losers[pos].source = source; - losers[pos].keyp = &key; - } - - unsigned int - init_winner(unsigned int root) - { - if (root >= k) - return root; - else - { - unsigned int left = init_winner (2 * root); - unsigned int right = init_winner (2 * root + 1); - if (losers[right].sup - || (!losers[left].sup - && !comp(*losers[right].keyp, *losers[left].keyp))) - { - // Left one is less or equal. - losers[root] = losers[right]; - return left; - } - else - { - // Right one is less. - losers[root] = losers[left]; - return right; - } - } - } - - void - init() - { losers[0] = losers[init_winner(1)]; } - - void - delete_min_insert(const T& key, bool sup) - { - const T* keyp = &key; - int source = losers[0].source; - for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted. - if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp))) - { - // The other one is smaller. - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); - std::swap(losers[pos].keyp, keyp); - } - } - - losers[0].sup = sup; - losers[0].source = source; - losers[0].keyp = keyp; - } - - void - insert_start_stable(const T& key, int source, bool sup) - { return insert_start(key, source, sup); } - - unsigned int - init_winner_stable(unsigned int root) - { - if (root >= k) - { - return root; - } - else - { - unsigned int left = init_winner (2 * root); - unsigned int right = init_winner (2 * root + 1); - if (losers[right].sup - || (!losers[left].sup && !comp(*losers[right].keyp, - *losers[left].keyp))) - { - // Left one is less or equal. - losers[root] = losers[right]; - return left; - } - else - { - // Right one is less. - losers[root] = losers[left]; - return right; - } - } - } - - void - init_stable() - { losers[0] = losers[init_winner_stable(1)]; } - - void - delete_min_insert_stable(const T& key, bool sup) - { - const T* keyp = &key; - int source = losers[0].source; - for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted, ties are broken by source. - if ( (sup && (!losers[pos].sup || losers[pos].source < source)) - || (!sup && !losers[pos].sup && - ((comp(*losers[pos].keyp, *keyp)) - || (!comp(*keyp, *losers[pos].keyp) - && losers[pos].source < source)))) - { - // The other one is smaller. - std::swap(losers[pos].sup, sup); - std::swap(losers[pos].source, source); - std::swap(losers[pos].keyp, keyp); - } - } - - losers[0].sup = sup; - losers[0].source = source; - losers[0].keyp = keyp; - } - }; - + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + +#if _GLIBCXX_ASSERTIONS + // If left one is sentinel then right one must be, too. + if (losers[left].source == -1) + _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1); #endif -#if _GLIBCXX_LOSER_TREE_UNGUARDED - -/** @brief Unguarded loser tree, copying the whole element into the -* tree structure. -* -* No guarding is done, therefore not a single input sequence must -* run empty. This is a very fast variant. -*/ -template > - class LoserTreeUnguarded + if (!comp(losers[right].key, losers[left].key)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() { - private: - struct Loser - { - int source; - T key; - }; - - unsigned int ik, k, offset; - unsigned int* mapping; - Loser* losers; - Comparator comp; - - void - map(unsigned int root, unsigned int begin, unsigned int end) - { - if (begin + 1 == end) - mapping[begin] = root; - else - { - // Next greater or equal power of 2. - unsigned int left = 1 << (log2(end - begin - 1)); - map(root * 2, begin, begin + left); - map(root * 2 + 1, begin + left, end); - } - } - - public: - LoserTreeUnguarded(unsigned int _k, Comparator _comp = std::less()) - : comp(_comp) - { - ik = _k; - // Next greater or equal power of 2. - k = 1 << (log2(ik - 1) + 1); - offset = k; - losers = new Loser[k + ik]; - mapping = new unsigned int[ik]; - map(1, 0, ik); - } - - ~LoserTreeUnguarded() - { - delete[] losers; - delete[] mapping; - } - - int - get_min_source() - { return losers[0].source; } - - void - insert_start(const T& key, int source, bool) - { - unsigned int pos = mapping[source]; - losers[pos].source = source; - losers[pos].key = key; - } - - unsigned int - init_winner(unsigned int root, unsigned int begin, unsigned int end) - { - if (begin + 1 == end) - return mapping[begin]; - else - { - // Next greater or equal power of 2. - unsigned int division = 1 << (log2(end - begin - 1)); - unsigned int left = init_winner(2 * root, begin, begin + division); - unsigned int right = - init_winner(2 * root + 1, begin + division, end); - if (!comp(losers[right].key, losers[left].key)) - { - // Left one is less or equal. - losers[root] = losers[right]; - return left; - } - else - { - // Right one is less. - losers[root] = losers[left]; - return right; - } - } - } - - void - init() - { losers[0] = losers[init_winner(1, 0, ik)]; } - - // Do not pass const reference since key will be used as local variable. - void - delete_min_insert(const T& key, bool) - { - losers[0].key = key; - T& keyr = losers[0].key; - int& source = losers[0].source; - for (int pos = mapping[source] / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted. - if (comp(losers[pos].key, keyr)) - { - // The other one is smaller. - std::swap(losers[pos].source, source); - std::swap(losers[pos].key, keyr); - } - } - } - - void - insert_start_stable(const T& key, int source, bool) - { return insert_start(key, source, false); } - - void - init_stable() - { init(); } - - void - delete_min_insert_stable(const T& key, bool) - { - losers[0].key = key; - T& keyr = losers[0].key; - int& source = losers[0].source; - for (int pos = mapping[source] / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted, ties are broken by source. - if (comp(losers[pos].key, keyr) - || (!comp(keyr, losers[pos].key) - && losers[pos].source < source)) - { - // The other one is smaller. - std::swap(losers[pos].source, source); - std::swap(losers[pos].key, keyr); - } - } - } - }; + losers[0] = losers[init_winner(1)]; + // no dummy sequence can ever be at the top at the beginning (0 sequences!) +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif + } -#if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED + // Do not pass a const reference since key will be used as local variable. + inline void + delete_min_insert(T key, bool) + { + printf("wrong\n"); + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (comp(losers[pos].key, key)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].key, key); + } + } + + losers[0].source = source; + losers[0].key = key; + } +}; /** @brief Unguarded loser tree, keeping only pointers to the * elements in the tree structure. @@ -926,175 +773,233 @@ template > * No guarding is done, therefore not a single input sequence must * run empty. This is a very fast variant. */ -template > - class LoserTreePointerUnguarded +template +class LoserTreePointerUnguardedBase +{ +protected: + struct Loser { - private: - struct Loser - { - int source; - const T* keyp; - }; - - unsigned int ik, k, offset; - unsigned int* mapping; - Loser* losers; - Comparator comp; - - void map(unsigned int root, unsigned int begin, unsigned int end) - { - if (begin + 1 == end) - mapping[begin] = root; - else - { - // Next greater or equal power of 2. - unsigned int left = 1 << (log2(end - begin - 1)); - map(root * 2, begin, begin + left); - map(root * 2 + 1, begin + left, end); - } - } - - public: - LoserTreePointerUnguarded(unsigned int _k, - Comparator _comp = std::less()) - : comp(_comp) - { - ik = _k; - - // Next greater power of 2. - k = 1 << (log2(ik - 1) + 1); - offset = k; - losers = new Loser[k + ik]; - mapping = new unsigned int[ik]; - map(1, 0, ik); - } - - ~LoserTreePointerUnguarded() - { - delete[] losers; - delete[] mapping; - } - - int - get_min_source() - { return losers[0].source; } - - void - insert_start(const T& key, int source, bool) - { - unsigned int pos = mapping[source]; - losers[pos].source = source; - losers[pos].keyp = &key; - } - - unsigned int - init_winner(unsigned int root, unsigned int begin, unsigned int end) - { - if (begin + 1 == end) - return mapping[begin]; - else - { - // Next greater or equal power of 2. - unsigned int division = 1 << (log2(end - begin - 1)); - unsigned int left = init_winner(2 * root, begin, begin + division); - unsigned int right = init_winner(2 * root + 1, - begin + division, end); - if (!comp(*losers[right].keyp, *losers[left].keyp)) - { - // Left one is less or equal. - losers[root] = losers[right]; - return left; - } - else - { - // Right one is less. - losers[root] = losers[left]; - return right; - } - } - } + int source; + const T* keyp; + }; - void - init() - { losers[0] = losers[init_winner(1, 0, ik)]; } + unsigned int ik, k, offset; + Loser* losers; + const T sentinel; + Comparator comp; - void - delete_min_insert(const T& key, bool) - { - const T* keyp = &key; - int& source = losers[0].source; - for (int pos = mapping[source] / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted. - if (comp(*losers[pos].keyp, *keyp)) - { - // The other one is smaller. - std::swap(losers[pos].source, source); - std::swap(losers[pos].keyp, keyp); - } - } - - losers[0].keyp = keyp; - } +public: - void - insert_start_stable(const T& key, int source, bool) - { return insert_start(key, source, false); } + inline + LoserTreePointerUnguardedBase(unsigned int _k, const T _sentinel, + Comparator _comp = std::less()) + : sentinel(_sentinel), comp(_comp) + { + ik = _k; + + // Next greater power of 2. + k = 1 << (log2(ik - 1) + 1); + offset = k; + // Avoid default-constructing losers[].key + losers = new Loser[2 * k]; + + for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i) + { + losers[i].keyp = &sentinel; + losers[i].source = -1; + } + } + + inline ~LoserTreePointerUnguardedBase() + { delete[] losers; } + + inline int + get_min_source() + { + // no dummy sequence can ever be at the top! +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + return losers[0].source; + } - void - init_stable() - { init(); } + inline void + insert_start(const T& key, int source, bool) + { + unsigned int pos = k + source; + + losers[pos].keyp = &key; + losers[pos].source = source; + } +}; + +/** + * @brief Stable unguarded LoserTree variant storing pointers. + * + * Unstable variant is implemented below using partial specialization. + */ +template +class LoserTreePointerUnguarded : + public LoserTreePointerUnguardedBase +{ + typedef LoserTreePointerUnguardedBase Base; + using Base::k; + using Base::losers; + +public: + LoserTreePointerUnguarded(unsigned int _k, const T _sentinel, + Comparator _comp = std::less()) + : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + if (!comp(*losers[right].keyp, *losers[left].keyp)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() + { + losers[0] = losers[init_winner(1)]; - void - delete_min_insert_stable(const T& key, bool) - { - int& source = losers[0].source; - const T* keyp = &key; - for (int pos = mapping[source] / 2; pos > 0; pos /= 2) - { - // The smaller one gets promoted, ties are broken by source. - if (comp(*losers[pos].keyp, *keyp) - || (!comp(*keyp, *losers[pos].keyp) - && losers[pos].source < source)) - { - // The other one is smaller. - std::swap(losers[pos].source, source); - std::swap(losers[pos].keyp, keyp); - } - } - losers[0].keyp = keyp; - } - }; + // no dummy sequence can ever be at the top at the beginning (0 sequences!) +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif + } -template - struct loser_tree_traits + inline void + delete_min_insert(const T& key, bool sup) { -#if _GLIBCXX_LOSER_TREE - typedef LoserTree<_ValueTp, Comparator> LT; -#else -# if _GLIBCXX_LOSER_TREE_POINTER - typedef LoserTreePointer<_ValueTp, Comparator> LT; -# else -# error Must define some type in losertree.h. -# endif + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted, ties are broken by source. + if (comp(*losers[pos].keyp, *keyp) + || (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].source = source; + losers[0].keyp = keyp; + + // no dummy sequence can ever be at the top! +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); +#endif + } +}; + +/** + * @brief Unstable unguarded LoserTree variant storing pointers. + * + * Stable variant is above. + */ +template +class LoserTreePointerUnguarded : + public LoserTreePointerUnguardedBase +{ + typedef LoserTreePointerUnguardedBase Base; + using Base::k; + using Base::losers; + +public: + LoserTreePointerUnguarded(unsigned int _k, const T _sentinel, + Comparator _comp = std::less()) + : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp) + {} + + unsigned int + init_winner(unsigned int root) + { + if (root >= k) + { + return root; + } + else + { + unsigned int left = init_winner (2 * root); + unsigned int right = init_winner (2 * root + 1); + +#if _GLIBCXX_ASSERTIONS + // If left one is sentinel then right one must be, too. + if (losers[left].source == -1) + _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1); #endif - }; -template - struct loser_tree_unguarded_traits + if (!comp(*losers[right].keyp, *losers[left].keyp)) + { + // Left one is less or equal. + losers[root] = losers[right]; + return left; + } + else + { + // Right one is less. + losers[root] = losers[left]; + return right; + } + } + } + + inline void + init() { -#if _GLIBCXX_LOSER_TREE_UNGUARDED - typedef LoserTreeUnguarded<_ValueTp, Comparator> LT; -#else -# if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED - typedef LoserTreePointerUnguarded<_ValueTp, Comparator> LT; -# else -# error Must define some unguarded type in losertree.h. -# endif + losers[0] = losers[init_winner(1)]; + + // no dummy sequence can ever be at the top at the beginning (0 sequences!) +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif - }; + } -} + inline void + delete_min_insert(const T& key, bool sup) + { + const T* keyp = &key; + int source = losers[0].source; + for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) + { + // The smaller one gets promoted. + if (comp(*(losers[pos].keyp), *keyp)) + { + // The other one is smaller. + std::swap(losers[pos].source, source); + std::swap(losers[pos].keyp, keyp); + } + } + + losers[0].source = source; + losers[0].keyp = keyp; + } +}; + +} // namespace __gnu_parallel #endif diff --git a/libstdc++-v3/include/parallel/merge.h b/libstdc++-v3/include/parallel/merge.h index f12f3110871..6e0f2e382c3 100644 --- a/libstdc++-v3/include/parallel/merge.h +++ b/libstdc++-v3/include/parallel/merge.h @@ -239,19 +239,26 @@ namespace __gnu_parallel std::iterator_traits:: difference_type max_length, Comparator comp) { - typedef typename std::iterator_traits::value_type - value_type; + typedef typename + std::iterator_traits::value_type value_type; typedef typename std::iterator_traits:: difference_type difference_type1 /* == difference_type2 */; typedef typename std::iterator_traits:: difference_type difference_type3; + typedef typename std::pair + iterator_pair; std::pair seqs[2] = { std::make_pair(begin1, end1), std::make_pair(begin2, end2) }; - RandomAccessIterator3 - target_end = parallel_multiway_merge(seqs, seqs + 2, target, - comp, max_length, true, false); + RandomAccessIterator3 + target_end = parallel_multiway_merge + < /* stable = */ true, /* sentinels = */ false>( + seqs, seqs + 2, target, comp, + multiway_merge_exact_splitting + < /* stable = */ true, iterator_pair*, + Comparator, difference_type1>, + max_length); return target_end; } diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h index 6cc724b6015..40a2f1bc6af 100644 --- a/libstdc++-v3/include/parallel/multiway_merge.h +++ b/libstdc++-v3/include/parallel/multiway_merge.h @@ -40,7 +40,7 @@ * This file is a GNU parallel extension to the Standard C++ Library. */ -// Written by Johannes Singler. +// Written by Johannes Singler and Manuel Holtgrewe. #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H @@ -50,7 +50,6 @@ #include #include #include -#include #include #if _GLIBCXX_ASSERTIONS #include @@ -59,27 +58,34 @@ /** @brief Length of a sequence described by a pair of iterators. */ #define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first) -// XXX need iterator typedefs namespace __gnu_parallel { + +// Announce guarded and unguarded iterator. + template class guarded_iterator; +// Making the arguments const references seems to dangerous, +// the user-defined comparator might not be const. template inline bool operator<(guarded_iterator& bi1, - guarded_iterator& bi2); + guarded_iterator& bi2); template inline bool operator<=(guarded_iterator& bi1, - guarded_iterator& bi2); + guarded_iterator& bi2); - /** @brief Iterator wrapper supporting an implicit supremum at the end - of the sequence, dominating all comparisons. - * Deriving from RandomAccessIterator is not possible since - * RandomAccessIterator need not be a class. - */ +/** @brief Iterator wrapper supporting an implicit supremum at the end + * of the sequence, dominating all comparisons. + * + * The implicit supremum comes with a performance cost. + * + * Deriving from RandomAccessIterator is not possible since + * RandomAccessIterator need not be a class. + */ template class guarded_iterator { @@ -100,7 +106,7 @@ template * @param comp Comparator provided for associated overloaded * compare operators. */ guarded_iterator(RandomAccessIterator begin, - RandomAccessIterator end, Comparator& comp) + RandomAccessIterator end, Comparator& comp) : current(begin), end(end), comp(comp) { } @@ -115,7 +121,7 @@ template /** @brief Dereference operator. * @return Referenced element. */ - typename std::iterator_traits::value_type + typename std::iterator_traits::value_type& operator*() { return *current; } @@ -158,7 +164,7 @@ template template inline bool operator<=(guarded_iterator& bi1, - guarded_iterator& bi2) + guarded_iterator& bi2) { if (bi2.current == bi2.end) //bi1 is sup return bi1.current != bi1.end; //bi2 is not sup @@ -185,7 +191,7 @@ template { private: /** @brief Current iterator position. */ - RandomAccessIterator& current; + RandomAccessIterator current; /** @brief Comparator. */ mutable Comparator& comp; @@ -195,7 +201,7 @@ template * @param end Unused, only for compatibility. * @param comp Unused, only for compatibility. */ unguarded_iterator(RandomAccessIterator begin, - RandomAccessIterator end, Comparator& comp) + RandomAccessIterator end, Comparator& comp) : current(begin), comp(comp) { } @@ -210,7 +216,7 @@ template /** @brief Dereference operator. * @return Referenced element. */ - typename std::iterator_traits::value_type + typename std::iterator_traits::value_type& operator*() { return *current; } @@ -256,159 +262,41 @@ template return !(bi1.comp)(*bi2, *bi1); } -/** Prepare a set of sequences to be merged without a (end) guard - * @param seqs_begin - * @param seqs_end - * @param comp - * @param min_sequence - * @param stable - * @pre (seqs_end - seqs_begin > 0) */ -template - typename std::iterator_traits< - typename std::iterator_traits::value_type - ::first_type>::difference_type - prepare_unguarded(RandomAccessIteratorIterator seqs_begin, - RandomAccessIteratorIterator seqs_end, Comparator comp, - int& min_sequence, bool stable) - { - _GLIBCXX_CALL(seqs_end - seqs_begin) - - typedef typename std::iterator_traits - ::value_type::first_type - RandomAccessIterator1; - typedef typename std::iterator_traits::value_type - value_type; - typedef typename std::iterator_traits - ::difference_type - difference_type; - - if ((*seqs_begin).first == (*seqs_begin).second) - { - // Empty sequence found, it's the first one. - min_sequence = 0; - return -1; - } - - // Last element in sequence. - value_type min = *((*seqs_begin).second - 1); - min_sequence = 0; - for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s) - { - if ((*s).first == (*s).second) - { - // Empty sequence found. - min_sequence = static_cast(s - seqs_begin); - return -1; - } - - // Last element in sequence. - const value_type& v = *((*s).second - 1); - if (comp(v, min)) //strictly smaller - { - min = v; - min_sequence = static_cast(s - seqs_begin); - } - } - - difference_type overhang_size = 0; - - int s = 0; - for (s = 0; s <= min_sequence; ++s) - { - RandomAccessIterator1 split; - if (stable) - split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second, - min, comp); - else - split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, - min, comp); - - overhang_size += seqs_begin[s].second - split; - } - - for (; s < (seqs_end - seqs_begin); ++s) - { - RandomAccessIterator1 split = std::lower_bound( - seqs_begin[s].first, seqs_begin[s].second, min, comp); - overhang_size += seqs_begin[s].second - split; - } - - // So many elements will be left over afterwards. - return overhang_size; - } - -/** Prepare a set of sequences to be merged with a (end) guard (sentinel) - * @param seqs_begin - * @param seqs_end - * @param comp */ -template - typename std::iterator_traits::value_type::first_type>::difference_type - prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin, - RandomAccessIteratorIterator seqs_end, - Comparator comp) - { - _GLIBCXX_CALL(seqs_end - seqs_begin) - - typedef typename std::iterator_traits - ::value_type::first_type - RandomAccessIterator1; - typedef typename std::iterator_traits - ::value_type - value_type; - typedef typename std::iterator_traits - ::difference_type - difference_type; - - // Last element in sequence. - value_type* max = NULL; - for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) - { - if ((*s).first == (*s).second) - continue; - - // Last element in sequence. - value_type& v = *((*s).second - 1); - - // Strictly greater. - if (!max || comp(*max, v)) - max = &v; - } - - difference_type overhang_size = 0; - for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) - { - RandomAccessIterator1 split = - std::lower_bound((*s).first, (*s).second, *max, comp); - overhang_size += (*s).second - split; - - // Set sentinel. - *((*s).second) = *max; - } - - // So many elements will be left over afterwards. - return overhang_size; - } - /** @brief Highly efficient 3-way merging procedure. - * @param seqs_begin Begin iterator of iterator pair input sequence. - * @param seqs_end End iterator of iterator pair input sequence. - * @param target Begin iterator out output sequence. - * @param comp Comparator. - * @param length Maximum length to merge. - * @param stable Unused, stable anyway. - * @return End iterator of output sequence. */ + * + * Merging is done with the algorithm implementation described by Peter + * Sanders. Basically, the idea is to minimize the number of necessary + * comparison after merging out an element. The implementation trick + * that makes this fast is that the order of the sequences is stored + * in the instruction pointer (translated into labels in C++). + * + * This works well for merging up to 4 sequences. + * + * Note that making the merging stable does not come at a + * performance hit. + * + * Whether the merging is done guarded or unguarded is selected by the + * used iterator class. + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * + * @return End iterator of output sequence. + */ template class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator> RandomAccessIterator3 - multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, - RandomAccessIteratorIterator seqs_end, - RandomAccessIterator3 target, - Comparator comp, _DifferenceTp length, - bool stable) + multiway_merge_3_variant( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + Comparator comp, _DifferenceTp length) { _GLIBCXX_CALL(length); @@ -423,6 +311,10 @@ template class iterator, if (length == 0) return target; +#if _GLIBCXX_ASSERTIONS + _DifferenceTp orig_length = length; +#endif + iterator seq0(seqs_begin[0].first, seqs_begin[0].second, comp), seq1(seqs_begin[1].first, seqs_begin[1].second, comp), @@ -450,17 +342,16 @@ template class iterator, else goto s210; } - -#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\ +#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1) \ s ## a ## b ## c : \ *target = *seq ## a; \ - ++target; \ - --length; \ - ++seq ## a; \ - if (length == 0) goto finish; \ - if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \ - if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \ - goto s ## b ## c ## a; + ++target; \ + --length; \ + ++seq ## a; \ + if (length == 0) goto finish; \ + if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \ + if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \ + goto s ## b ## c ## a; _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=); _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < ); @@ -474,6 +365,14 @@ template class iterator, finish: ; +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT( + ((RandomAccessIterator1)seq0 - seqs_begin[0].first) + + ((RandomAccessIterator1)seq1 - seqs_begin[1].first) + + ((RandomAccessIterator1)seq2 - seqs_begin[2].first) + == orig_length); +#endif + seqs_begin[0].first = seq0; seqs_begin[1].first = seq1; seqs_begin[2].first = seq2; @@ -481,95 +380,31 @@ template class iterator, return target; } -template - RandomAccessIterator3 - multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, - RandomAccessIteratorIterator seqs_end, - RandomAccessIterator3 target, - Comparator comp, - _DifferenceTp length, bool stable) - { - _GLIBCXX_CALL(length); - - typedef _DifferenceTp difference_type; - typedef typename std::iterator_traits - ::value_type::first_type - RandomAccessIterator1; - typedef typename std::iterator_traits::value_type - value_type; - - int min_seq; - RandomAccessIterator3 target_end; - - // Stable anyway. - difference_type overhang = - prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true); - - difference_type total_length = 0; - for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) - total_length += _GLIBCXX_PARALLEL_LENGTH(*s); - - if (overhang != -1) - { - difference_type unguarded_length = - std::min(length, total_length - overhang); - target_end = multiway_merge_3_variant - (seqs_begin, seqs_end, target, comp, unguarded_length, stable); - overhang = length - unguarded_length; - } - else - { - // Empty sequence found. - overhang = length; - target_end = target; - } - -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); - _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); -#endif - - switch (min_seq) - { - case 0: - // Iterators will be advanced accordingly. - target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second, - seqs_begin[2].first, seqs_begin[2].second, - target_end, overhang, comp); - break; - case 1: - target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second, - seqs_begin[2].first, seqs_begin[2].second, - target_end, overhang, comp); - break; - case 2: - target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second, - seqs_begin[1].first, seqs_begin[1].second, - target_end, overhang, comp); - break; - default: - _GLIBCXX_PARALLEL_ASSERT(false); - } - -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); - _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); -#endif - - return target_end; - } - -/** @brief Highly efficient 4-way merging procedure. - * @param seqs_begin Begin iterator of iterator pair input sequence. - * @param seqs_end End iterator of iterator pair input sequence. - * @param target Begin iterator out output sequence. - * @param comp Comparator. - * @param length Maximum length to merge. - * @param stable Unused, stable anyway. - * @return End iterator of output sequence. */ +/** + * @brief Highly efficient 4-way merging procedure. + * + * Merging is done with the algorithm implementation described by Peter + * Sanders. Basically, the idea is to minimize the number of necessary + * comparison after merging out an element. The implementation trick + * that makes this fast is that the order of the sequences is stored + * in the instruction pointer (translated into goto labels in C++). + * + * This works well for merging up to 4 sequences. + * + * Note that making the merging stable does not come at a + * performance hit. + * + * Whether the merging is done guarded or unguarded is selected by the + * used iterator class. + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * + * @return End iterator of output sequence. + */ template class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, @@ -579,7 +414,7 @@ template class iterator, multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, - Comparator comp, _DifferenceTp length, bool stable) + Comparator comp, _DifferenceTp length) { _GLIBCXX_CALL(length); typedef _DifferenceTp difference_type; @@ -676,651 +511,467 @@ template class iterator, return target; } -templateLT. + * + * Stability is selected through the used LoserTree class LT. + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * + * @return End iterator of output sequence. + */ +template RandomAccessIterator3 - multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, + multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, - _DifferenceTp length, bool stable) + _DifferenceTp length) { - _GLIBCXX_CALL(length); - typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(length) + typedef _DifferenceTp difference_type; typedef typename std::iterator_traits ::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; - int min_seq; - RandomAccessIterator3 target_end; + int k = static_cast(seqs_end - seqs_begin); - // Stable anyway. - difference_type overhang = - prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true); + LT lt(k, comp); difference_type total_length = 0; - for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) - total_length += _GLIBCXX_PARALLEL_LENGTH(*s); - if (overhang != -1) + // Default value for potentially non-default-constructible types. + value_type* arbitrary_element = NULL; + + for (int t = 0; t < k; ++t) { - difference_type unguarded_length = - std::min(length, total_length - overhang); - target_end = multiway_merge_4_variant - (seqs_begin, seqs_end, target, comp, unguarded_length, stable); - overhang = length - unguarded_length; + if(arbitrary_element == NULL + && _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0) + arbitrary_element = &(*seqs_begin[t].first); + total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]); } - else + + if(total_length == 0) + return target; + + for (int t = 0; t < k; ++t) { - // Empty sequence found. - overhang = length; - target_end = target; + if (seqs_begin[t].first == seqs_begin[t].second) + lt.insert_start(*arbitrary_element, t, true); + else + lt.insert_start(*seqs_begin[t].first, t, false); } -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); - _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); -#endif + lt.init(); - std::vector > - one_missing(seqs_begin, seqs_end); - one_missing.erase(one_missing.begin() + min_seq); //remove + const difference_type const_total_length(std::min(total_length, length)); - target_end = multiway_merge_3_variant( - one_missing.begin(), one_missing.end(), - target_end, comp, overhang, stable); + int source; - // Insert back again. - one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]); - // Write back modified iterators. - copy(one_missing.begin(), one_missing.end(), seqs_begin); + for (difference_type i = 0; i < const_total_length; ++i) + { + //take out + source = lt.get_min_source(); -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); - _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); -#endif + *(target++) = *(seqs_begin[source].first++); - return target_end; + // Feed. + if (seqs_begin[source].first == seqs_begin[source].second) + lt.delete_min_insert(*arbitrary_element, true); + else + // Replace from same source. + lt.delete_min_insert(*seqs_begin[source].first, false); + } + + return target; } -/** @brief Basic multi-way merging procedure. +/** @brief Multi-way merging procedure for a high branching factor, + * unguarded case. * - * The head elements are kept in a sorted array, new heads are - * inserted linearly. - * @param seqs_begin Begin iterator of iterator pair input sequence. - * @param seqs_end End iterator of iterator pair input sequence. - * @param target Begin iterator out output sequence. - * @param comp Comparator. - * @param length Maximum length to merge. - * @param stable Stable merging incurs a performance penalty. - * @return End iterator of output sequence. + * Merging is done using the LoserTree class LT. + * + * Stability is selected by the used LoserTrees. + * + * @pre No input will run out of elements during the merge. + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * + * @return End iterator of output sequence. */ -template + typename _DifferenceTp, typename Comparator> RandomAccessIterator3 - multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, - RandomAccessIteratorIterator seqs_end, - RandomAccessIterator3 target, - Comparator comp, _DifferenceTp length, bool stable) + multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + int min_seq, Comparator comp, + _DifferenceTp length) { _GLIBCXX_CALL(length) - typedef _DifferenceTp difference_type; + typedef typename std::iterator_traits ::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; - int k = static_cast(seqs_end - seqs_begin); - int nrs; // Number of remaining sequences. + int k = seqs_end - seqs_begin; + + // Determine the sentinel. The sentinel is largest/last element of the + // sequences with the smallest largest/last element. + value_type sentinel = *(seqs_begin[min_seq].second - 1); + + LT lt(k, sentinel, comp); - // Avoid default constructor. - value_type* fe = static_cast( - ::operator new(sizeof(value_type) * k)); // Front elements. - int* source = new int[k]; difference_type total_length = 0; - // Write entries into queue. - nrs = 0; - for (int pi = 0; pi < k; ++pi) + for (int t = 0; t < k; ++t) { - if (seqs_begin[pi].first != seqs_begin[pi].second) - { - ::new(&(fe[nrs])) value_type(*(seqs_begin[pi].first)); - source[nrs] = pi; - ++nrs; - total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]); - } - } +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second); +#endif + lt.insert_start(*seqs_begin[t].first, t, false); - if (stable) - { - // Bubble sort fe and source by fe. - for (int k = 0; k < nrs - 1; ++k) - for (int pi = nrs - 1; pi > k; --pi) - if (comp(fe[pi], fe[pi - 1]) || - (!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1])) - { - std::swap(fe[pi - 1], fe[pi]); - std::swap(source[pi - 1], source[pi]); - } - } - else - { - for (int k = 0; k < nrs - 1; ++k) - for (int pi = nrs - 1; pi > k; --pi) - if (comp(fe[pi], fe[pi-1])) - { - std::swap(fe[pi-1], fe[pi]); - std::swap(source[pi-1], source[pi]); - } + total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]); } - // Iterate. - if (stable) - { - int j; - while (nrs > 0 && length > 0) - { - if (source[0] < source[1]) - { - // fe[0] <= fe[1] - while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0) - { - *target = fe[0]; - ++target; - ++(seqs_begin[source[0]].first); - --length; - if (seqs_begin[source[0]].first - == seqs_begin[source[0]].second) - { - // Move everything to the left. - for (int s = 0; s < nrs - 1; ++s) - { - fe[s] = fe[s + 1]; - source[s] = source[s + 1]; - } - fe[nrs - 1].~value_type(); //Destruct explicitly. - --nrs; - break; - } - else - fe[0] = *(seqs_begin[source[0]].first); - } - } - else - { - // fe[0] < fe[1] - while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0) - { - *target = fe[0]; - ++target; - ++(seqs_begin[source[0]].first); - --length; - if (seqs_begin[source[0]].first - == seqs_begin[source[0]].second) - { - for (int s = 0; s < nrs - 1; ++s) - { - fe[s] = fe[s + 1]; - source[s] = source[s + 1]; - } - fe[nrs - 1].~value_type(); //Destruct explicitly. - --nrs; - break; - } - else - fe[0] = *(seqs_begin[source[0]].first); - } - } - - // Sink down. - j = 1; - while ((j < nrs) && (comp(fe[j], fe[j - 1]) - || (!comp(fe[j - 1], fe[j]) - && (source[j] < source[j - 1])))) - { - std::swap(fe[j - 1], fe[j]); - std::swap(source[j - 1], source[j]); - ++j; - } - } - } - else + lt.init(); + + // Do not go past end. + length = std::min(total_length, length); + + int source; + +#if _GLIBCXX_ASSERTIONS + difference_type i = 0; +#endif + + RandomAccessIterator3 target_end = target + length; + while (target < target_end) { - int j; - while (nrs > 0 && length > 0) - { - // fe[0] <= fe[1] - while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0) - { - *target = fe[0]; - ++target; - ++seqs_begin[source[0]].first; - --length; - if (seqs_begin[source[0]].first - == seqs_begin[source[0]].second) - { - for (int s = 0; s < (nrs - 1); ++s) - { - fe[s] = fe[s + 1]; - source[s] = source[s + 1]; - } - fe[nrs - 1].~value_type(); //Destruct explicitly. - --nrs; - break; - } - else - fe[0] = *(seqs_begin[source[0]].first); - } - - // Sink down. - j = 1; - while ((j < nrs) && comp(fe[j], fe[j - 1])) - { - std::swap(fe[j - 1], fe[j]); - std::swap(source[j - 1], source[j]); - ++j; - } - } - } + // Take out. + source = lt.get_min_source(); - ::operator delete(fe); //Destructors already called. - delete[] source; +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(0 <= source && source < k); + _GLIBCXX_PARALLEL_ASSERT(i == 0 + || !comp(*(seqs_begin[source].first), *(target - 1))); +#endif - return target; - } + // Feed. + *(target++) = *(seqs_begin[source].first++); -/** @brief Multi-way merging procedure for a high branching factor, - * guarded case. - * - * The head elements are kept in a loser tree. - * @param seqs_begin Begin iterator of iterator pair input sequence. - * @param seqs_end End iterator of iterator pair input sequence. - * @param target Begin iterator out output sequence. - * @param comp Comparator. - * @param length Maximum length to merge. - * @param stable Stable merging incurs a performance penalty. - * @return End iterator of output sequence. - */ -template - RandomAccessIterator3 - multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, - RandomAccessIteratorIterator seqs_end, - RandomAccessIterator3 target, - Comparator comp, - _DifferenceTp length, bool stable) - { - _GLIBCXX_CALL(length) - - typedef _DifferenceTp difference_type; - typedef typename std::iterator_traits - ::value_type::first_type - RandomAccessIterator1; - typedef typename std::iterator_traits::value_type - value_type; - - int k = static_cast(seqs_end - seqs_begin); - - LT lt(k, comp); - - difference_type total_length = 0; - - // Default value for potentially non-default-constructible types. - value_type* arbitrary_element = NULL; - - for (int t = 0; t < k; ++t) - { - if(arbitrary_element == NULL - && _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0) - arbitrary_element = &(*seqs_begin[t].first); - total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]); - } - - if(total_length == 0) - return target; - - for (int t = 0; t < k; ++t) - { - if (stable) - { - if (seqs_begin[t].first == seqs_begin[t].second) - lt.insert_start_stable(*arbitrary_element, t, true); - else - lt.insert_start_stable(*seqs_begin[t].first, t, false); - } - else - { - if (seqs_begin[t].first == seqs_begin[t].second) - lt.insert_start(*arbitrary_element, t, true); - else - lt.insert_start(*seqs_begin[t].first, t, false); - } - } - - if (stable) - lt.init_stable(); - else - lt.init(); - - total_length = std::min(total_length, length); - - int source; - - if (stable) - { - for (difference_type i = 0; i < total_length; ++i) - { - // Take out. - source = lt.get_min_source(); - - *(target++) = *(seqs_begin[source].first++); - - // Feed. - if (seqs_begin[source].first == seqs_begin[source].second) - lt.delete_min_insert_stable(*arbitrary_element, true); - else - // Replace from same source. - lt.delete_min_insert_stable(*seqs_begin[source].first, false); - - } - } - else - { - for (difference_type i = 0; i < total_length; ++i) - { - //take out - source = lt.get_min_source(); - - *(target++) = *(seqs_begin[source].first++); - - // Feed. - if (seqs_begin[source].first == seqs_begin[source].second) - lt.delete_min_insert(*arbitrary_element, true); - else - // Replace from same source. - lt.delete_min_insert(*seqs_begin[source].first, false); - } +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT( + (seqs_begin[source].first != seqs_begin[source].second) + || (i >= length - 1)); + ++i; +#endif + // Replace from same source. + lt.delete_min_insert(*seqs_begin[source].first, false); } return target; } + /** @brief Multi-way merging procedure for a high branching factor, - * unguarded case. + * requiring sentinels to exist. + * @param stable The value must the same as for the used LoserTrees. + * @param UnguardedLoserTree Loser Tree variant to use for the unguarded + * merging. + * @param GuardedLoserTree Loser Tree variant to use for the guarded + * merging. * - * The head elements are kept in a loser tree. - * @param seqs_begin Begin iterator of iterator pair input sequence. - * @param seqs_end End iterator of iterator pair input sequence. - * @param target Begin iterator out output sequence. - * @param comp Comparator. - * @param length Maximum length to merge. - * @param stable Stable merging incurs a performance penalty. - * @return End iterator of output sequence. - * @pre No input will run out of elements during the merge. + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * + * @return End iterator of output sequence. */ -template +template< + typename UnguardedLoserTree, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> RandomAccessIterator3 - multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, - RandomAccessIteratorIterator seqs_end, - RandomAccessIterator3 target, - Comparator comp, - _DifferenceTp length, bool stable) + multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + Comparator comp, + _DifferenceTp length) { _GLIBCXX_CALL(length) - typedef _DifferenceTp difference_type; + typedef _DifferenceTp difference_type; + typedef std::iterator_traits traits_type; typedef typename std::iterator_traits ::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; - int k = seqs_end - seqs_begin; - - LT lt(k, comp); + RandomAccessIterator3 target_end; difference_type total_length = 0; - - for (int t = 0; t < k; ++t) + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) { -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second); -#endif - if (stable) - lt.insert_start_stable(*seqs_begin[t].first, t, false); - else - lt.insert_start(*seqs_begin[t].first, t, false); + total_length += _GLIBCXX_PARALLEL_LENGTH(*s); - total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]); + // Move the sequends end behind the sentinel spots. This has the + // effect that the sentinel appears to be within the sequence. Then, + // we can use the unguarded variant if we merge out as many + // non-sentinel elements as we have. + ++((*s).second); } - if (stable) - lt.init_stable(); - else - lt.init(); - - // Do not go past end. - length = std::min(total_length, length); - - int source; - -#if _GLIBCXX_ASSERTIONS - difference_type i = 0; -#endif - - if (stable) - { - RandomAccessIterator3 target_end = target + length; - while (target < target_end) - { - // Take out. - source = lt.get_min_source(); - -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(i == 0 - || !comp(*(seqs_begin[source].first), *(target - 1))); -#endif - - *(target++) = *(seqs_begin[source].first++); + difference_type unguarded_length = + std::min(length, total_length); + target_end = multiway_merge_loser_tree_unguarded + + (seqs_begin, seqs_end, target, 0, comp, unguarded_length); #if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT( - (seqs_begin[source].first != seqs_begin[source].second) - || (i == length - 1)); - ++i; + _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); + _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); #endif - // Feed. - // Replace from same source. - lt.delete_min_insert_stable(*seqs_begin[source].first, false); - } - } - else - { - RandomAccessIterator3 target_end = target + length; - while (target < target_end) - { - // Take out. - source = lt.get_min_source(); + // Restore the sequence ends so the sentinels are not contained in the + // sequence any more (see comment in loop above). + for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) + { --((*s).second); } -#if _GLIBCXX_ASSERTIONS - if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1))) - printf(" %i %i %i\n", length, i, source); - _GLIBCXX_PARALLEL_ASSERT(i == 0 - || !comp(*(seqs_begin[source].first), *(target - 1))); -#endif + return target_end; + } - *(target++) = *(seqs_begin[source].first++); +/** + * @brief Traits for determining whether the loser tree should + * use pointers or copies. + * + * The field "use_pointer" is used to determine whether to use pointers in + * the loser trees or whether to copy the values into the loser tree. + * + * The default behavior is to use pointers if the data type is 4 times as + * big as the pointer to it. + * + * Specialize for your data type to customize the behavior. + * + * Example: + * + * template<> + * struct loser_tree_traits + * { static const bool use_pointer = false; }; + * + * template<> + * struct loser_tree_traits + * { static const bool use_pointer = true; }; + * + * @param T type to give the loser tree traits for. + */ +template +struct loser_tree_traits +{ + /** + * @brief True iff to use pointers instead of values in loser trees. + * + * The default behavior is to use pointers if the data type is four + * times as big as the pointer to it. + */ + static const bool use_pointer = (sizeof(T) > 4 * sizeof(T*)); +}; -#if _GLIBCXX_ASSERTIONS - if (!((seqs_begin[source].first != seqs_begin[source].second) - || (i >= length - 1))) - printf(" %i %i %i\n", length, i, source); - _GLIBCXX_PARALLEL_ASSERT( - (seqs_begin[source].first != seqs_begin[source].second) - || (i >= length - 1)); - ++i; -#endif - // Feed. - // Replace from same source. - lt.delete_min_insert(*seqs_begin[source].first, false); - } - } +/** + * @brief Switch for 3-way merging with sentinels turned off. + * + * Note that 3-way merging is always stable! + */ +template< + bool sentinels /*default == false*/, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_3_variant_sentinel_switch +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + Comparator comp, _DifferenceTp length) + { + return multiway_merge_3_variant( + seqs_begin, seqs_end, target, comp, length); + } +}; - return target; +/** + * @brief Switch for 3-way merging with sentinels turned on. + * + * Note that 3-way merging is always stable! + */ +template< + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_3_variant_sentinel_switch + +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + Comparator comp, _DifferenceTp length) + { + return multiway_merge_3_variant( + seqs_begin, seqs_end, target, comp, length); } +}; -template - RandomAccessIterator3 - multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, - RandomAccessIteratorIterator seqs_end, - RandomAccessIterator3 target, - Comparator comp, - _DifferenceTp length, bool stable) +/** + * @brief Switch for 4-way merging with sentinels turned off. + * + * Note that 4-way merging is always stable! + */ +template< + bool sentinels /*default == false*/, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_4_variant_sentinel_switch +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + Comparator comp, _DifferenceTp length) { - _GLIBCXX_CALL(length) + return multiway_merge_4_variant( + seqs_begin, seqs_end, target, comp, length); + } +}; - typedef _DifferenceTp difference_type; +/** + * @brief Switch for 4-way merging with sentinels turned on. + * + * Note that 4-way merging is always stable! + */ +template< + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_4_variant_sentinel_switch + +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + Comparator comp, _DifferenceTp length) + { + return multiway_merge_4_variant( + seqs_begin, seqs_end, target, comp, length); + } +}; +/** + * @brief Switch for k-way merging with sentinels turned on. + */ +template< + bool sentinels, + bool stable, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_k_variant_sentinel_switch +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + Comparator comp, _DifferenceTp length) + { typedef typename std::iterator_traits ::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; - int min_seq; - RandomAccessIterator3 target_end; - difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, - comp, min_seq, stable); - - difference_type total_length = 0; - for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) - total_length += _GLIBCXX_PARALLEL_LENGTH(*s); - - if (overhang != -1) - { - difference_type unguarded_length = - std::min(length, total_length - overhang); - target_end = multiway_merge_loser_tree_unguarded - ::LT> - (seqs_begin, seqs_end, target, comp, unguarded_length, stable); - overhang = length - unguarded_length; - } - else - { - // Empty sequence found. - overhang = length; - target_end = target; - } - -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); - _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); -#endif - - target_end = multiway_merge_loser_tree - ::LT> - (seqs_begin, seqs_end, target_end, comp, overhang, stable); - -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); - _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); -#endif - - return target_end; + return multiway_merge_loser_tree_sentinel< + typename __gnu_cxx::__conditional_type< + loser_tree_traits::use_pointer + , LoserTreePointerUnguarded + , LoserTreeUnguarded + >::__type>(seqs_begin, seqs_end, target, comp, length); } +}; -template - RandomAccessIterator3 - multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, - RandomAccessIteratorIterator seqs_end, - RandomAccessIterator3 target, - Comparator comp, - _DifferenceTp length, bool stable) +/** + * @brief Switch for k-way merging with sentinels turned off. + */ +template< + bool stable, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> +struct multiway_merge_k_variant_sentinel_switch + +{ + RandomAccessIterator3 operator()( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + RandomAccessIterator3 target, + Comparator comp, _DifferenceTp length) { - _GLIBCXX_CALL(length) - - typedef _DifferenceTp difference_type; - typedef std::iterator_traits traits_type; typedef typename std::iterator_traits ::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; - RandomAccessIterator3 target_end; - difference_type overhang = - prepare_unguarded_sentinel(seqs_begin, seqs_end, comp); - - difference_type total_length = 0; - for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) - { - total_length += _GLIBCXX_PARALLEL_LENGTH(*s); - - // Sentinel spot. - ++((*s).second); - } - - difference_type unguarded_length = - std::min(length, total_length - overhang); - target_end = multiway_merge_loser_tree_unguarded - ::LT> - (seqs_begin, seqs_end, target, comp, unguarded_length, stable); - overhang = length - unguarded_length; - -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); - _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); -#endif - - // Copy rest stable. - for (RandomAccessIteratorIterator s = seqs_begin; - s != seqs_end && overhang > 0; ++s) - { - // Restore. - --((*s).second); - difference_type local_length = - std::min(overhang, _GLIBCXX_PARALLEL_LENGTH(*s)); - target_end = std::copy((*s).first, (*s).first + local_length, - target_end); - (*s).first += local_length; - overhang -= local_length; - } - -#if _GLIBCXX_ASSERTIONS - _GLIBCXX_PARALLEL_ASSERT(overhang == 0); - _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); - _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); -#endif - - return target_end; + return multiway_merge_loser_tree< + typename __gnu_cxx::__conditional_type< + loser_tree_traits::use_pointer + , LoserTreePointer + , LoserTree + >::__type >(seqs_begin, seqs_end, target, comp, length); } +}; /** @brief Sequential multi-way merging switch. * - * The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and + * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and * runtime settings. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. @@ -1330,17 +981,18 @@ template +template< + bool stable, + bool sentinels, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Comparator> RandomAccessIterator3 - multiway_merge(RandomAccessIteratorIterator seqs_begin, + sequential_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, - Comparator comp, _DifferenceTp length, - bool stable, bool sentinel, - sequential_tag) + Comparator comp, _DifferenceTp length) { _GLIBCXX_CALL(length) @@ -1353,17 +1005,14 @@ template(seqs_end - seqs_begin); - _MultiwayMergeAlgorithm mwma = _Settings::get().multiway_merge_algorithm; - - if (!sentinel && mwma == LOSER_TREE_SENTINEL) - mwma = LOSER_TREE_COMBINED; - switch (k) { case 0: @@ -1382,113 +1031,30 @@ template(seqs_begin, - seqs_end, - target, - comp, length, - stable); - break; - default: - return_target = - multiway_merge_3_variant(seqs_begin, - seqs_end, - target, - comp, length, - stable); - break; - } + return_target = multiway_merge_3_variant_sentinel_switch< + sentinels + , RandomAccessIteratorIterator + , RandomAccessIterator3 + , _DifferenceTp + , Comparator>()(seqs_begin, seqs_end, target, comp, length); break; case 4: - switch (mwma) - { - case LOSER_TREE_COMBINED: - return_target = multiway_merge_4_combined(seqs_begin, - seqs_end, - target, - comp, length, stable); - break; - case LOSER_TREE_SENTINEL: - return_target = - multiway_merge_4_variant(seqs_begin, - seqs_end, - target, - comp, length, - stable); - break; - default: - return_target = multiway_merge_4_variant( - seqs_begin, - seqs_end, - target, - comp, length, stable); - break; - } + return_target = multiway_merge_4_variant_sentinel_switch< + sentinels + , RandomAccessIteratorIterator + , RandomAccessIterator3 + , _DifferenceTp + , Comparator>()(seqs_begin, seqs_end, target, comp, length); break; default: - { - switch (mwma) - { - case BUBBLE: - return_target = multiway_merge_bubble(seqs_begin, - seqs_end, - target, - comp, length, stable); - break; -#if _GLIBCXX_LOSER_TREE_EXPLICIT - case LOSER_TREE_EXPLICIT: - return_target = multiway_merge_loser_tree< - LoserTreeExplicit >(seqs_begin, - seqs_end, - target, - comp, length, - stable); - break; -#endif -#if _GLIBCXX_LOSER_TREE - case LOSER_TREE: - return_target = multiway_merge_loser_tree< - LoserTree >(seqs_begin, - seqs_end, - target, - comp, length, - stable); - break; -#endif -#if _GLIBCXX_LOSER_TREE_COMBINED - case LOSER_TREE_COMBINED: - return_target = multiway_merge_loser_tree_combined(seqs_begin, - seqs_end, - target, - comp, length, - stable); - break; -#endif -#if _GLIBCXX_LOSER_TREE_SENTINEL - case LOSER_TREE_SENTINEL: - return_target = multiway_merge_loser_tree_sentinel(seqs_begin, - seqs_end, - target, - comp, length, - stable); - break; -#endif - default: - // multiway_merge algorithm not implemented. - _GLIBCXX_PARALLEL_ASSERT(0); - break; - } - } + return_target = multiway_merge_k_variant_sentinel_switch< + sentinels + , stable + , RandomAccessIteratorIterator + , RandomAccessIterator3 + , _DifferenceTp + , Comparator>()(seqs_begin, seqs_end, target, comp, length); + break; } #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); @@ -1497,38 +1063,246 @@ template +struct sampling_sorter +{ + void operator()(RandomAccessIterator first, RandomAccessIterator last, + StrictWeakOrdering comp) + { __gnu_sequential::stable_sort(first, last, comp); } +}; + +/** + * @brief Non-stable sorting functor. + * + * Used to reduce code instanciation in multiway_merge_sampling_splitting. + */ +template +struct sampling_sorter +{ + void operator()(RandomAccessIterator first, RandomAccessIterator last, + StrictWeakOrdering comp) + { __gnu_sequential::sort(first, last, comp); } +}; + +/** + * @brief Sampling based splitting for parallel multiway-merge routine. + */ +template< + bool stable + , typename RandomAccessIteratorIterator + , typename Comparator + , typename difference_type> +void multiway_merge_sampling_splitting( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + Comparator comp, difference_type length, + difference_type total_length, + std::vector > *pieces) +{ + typedef typename std::iterator_traits + ::value_type::first_type + RandomAccessIterator1; + typedef typename std::iterator_traits::value_type + value_type; + + // k sequences. + int k = static_cast(seqs_end - seqs_begin); + + int num_threads = omp_get_num_threads(); + + difference_type num_samples = + __gnu_parallel::_Settings::get().merge_oversampling * num_threads; + + value_type* samples = static_cast( + ::operator new(sizeof(value_type) * k * num_samples)); + // Sample. + for (int s = 0; s < k; ++s) + for (difference_type i = 0; i < num_samples; ++i) + { + difference_type sample_index = + static_cast( + _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) * (double(i + 1) / + (num_samples + 1)) * (double(length) + / total_length)); + new(&(samples[s * num_samples + i])) value_type( + seqs_begin[s].first[sample_index]); + } + + // Sort stable or non-stable, depending on value of template parameter + // "stable". + sampling_sorter()( + samples, samples + (num_samples * k), comp); + + for (int slab = 0; slab < num_threads; ++slab) + // For each slab / processor. + for (int seq = 0; seq < k; ++seq) + { + // For each sequence. + if (slab > 0) + pieces[slab][seq].first = + std::upper_bound( + seqs_begin[seq].first, + seqs_begin[seq].second, + samples[num_samples * k * slab / num_threads], + comp) + - seqs_begin[seq].first; + else + { + // Absolute beginning. + pieces[slab][seq].first = 0; + } + if ((slab + 1) < num_threads) + pieces[slab][seq].second = + std::upper_bound( + seqs_begin[seq].first, + seqs_begin[seq].second, + samples[num_samples * k * (slab + 1) / + num_threads], comp) + - seqs_begin[seq].first; + else + pieces[slab][seq].second = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]); + } + ::operator delete(samples); +} + +/** + * @brief Exact splitting for parallel multiway-merge routine. + */ +template< + bool stable + , typename RandomAccessIteratorIterator + , typename Comparator + , typename difference_type> +void multiway_merge_exact_splitting( + RandomAccessIteratorIterator seqs_begin, + RandomAccessIteratorIterator seqs_end, + Comparator comp, + difference_type length, + difference_type total_length, + std::vector > *pieces) +{ + typedef typename std::iterator_traits + ::value_type::first_type + RandomAccessIterator1; + + const bool tight = (total_length == length); + + // k sequences. + const int k = static_cast(seqs_end - seqs_begin); + + const int num_threads = omp_get_num_threads(); + + // (Settings::multiway_merge_splitting == __gnu_parallel::_Settings::EXACT). + std::vector* offsets = + new std::vector[num_threads]; + std::vector< + std::pair + > se(k); + + copy(seqs_begin, seqs_end, se.begin()); + + difference_type* borders = + new difference_type[num_threads + 1]; + equally_split(length, num_threads, borders); + + for (int s = 0; s < (num_threads - 1); ++s) + { + offsets[s].resize(k); + multiseq_partition( + se.begin(), se.end(), borders[s + 1], + offsets[s].begin(), comp); + + // Last one also needed and available. + if (!tight) + { + offsets[num_threads - 1].resize(k); + multiseq_partition(se.begin(), se.end(), + difference_type(length), + offsets[num_threads - 1].begin(), comp); + } + } + + + for (int slab = 0; slab < num_threads; ++slab) + { + // For each slab / processor. + for (int seq = 0; seq < k; ++seq) + { + // For each sequence. + if (slab == 0) + { + // Absolute beginning. + pieces[slab][seq].first = 0; + } + else + pieces[slab][seq].first = + pieces[slab - 1][seq].second; + if (!tight || slab < (num_threads - 1)) + pieces[slab][seq].second = + offsets[slab][seq] - seqs_begin[seq].first; + else + { + // slab == num_threads - 1 + pieces[slab][seq].second = + _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]); + } + } + } + delete[] offsets; +} + /** @brief Parallel multi-way merge routine. * - * The _GLIBCXX_PARALLEL_DECISION if based on the branching factor - * and runtime settings. - * @param seqs_begin Begin iterator of iterator pair input sequence. - * @param seqs_end End iterator of iterator pair input sequence. - * @param target Begin iterator out output sequence. - * @param comp Comparator. - * @param length Maximum length to merge. - * @param stable Stable merging incurs a performance penalty. - * @param sentinel Ignored. - * @return End iterator of output sequence. + * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor + * and runtime settings. + * + * Must not be called if the number of sequences is 1. + * + * @param Splitter functor to split input (either exact or sampling based) + * + * @param seqs_begin Begin iterator of iterator pair input sequence. + * @param seqs_end End iterator of iterator pair input sequence. + * @param target Begin iterator out output sequence. + * @param comp Comparator. + * @param length Maximum length to merge. + * @param stable Stable merging incurs a performance penalty. + * @param sentinel Ignored. + * @return End iterator of output sequence. */ -template +template< + bool stable, + bool sentinels, + typename RandomAccessIteratorIterator, + typename RandomAccessIterator3, + typename _DifferenceTp, + typename Splitter, + typename Comparator + > RandomAccessIterator3 parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, - RandomAccessIterator3 target, - Comparator comp, - _DifferenceTp length, bool stable, bool sentinel) + RandomAccessIterator3 target, + Comparator comp, + Splitter splitter, + _DifferenceTp length) { +#if _GLIBCXX_ASSERTIONS + _GLIBCXX_PARALLEL_ASSERT(seqs_end - seqs_begin > 1); +#endif + _GLIBCXX_CALL(length) typedef _DifferenceTp difference_type; typedef typename std::iterator_traits ::value_type::first_type RandomAccessIterator1; - typedef typename std::iterator_traits::value_type - value_type; + typedef typename + std::iterator_traits::value_type value_type; // k sequences. int k = static_cast(seqs_end - seqs_begin); @@ -1543,13 +1317,10 @@ template >* pieces; thread_index_t num_threads = static_cast( - std::min(get_max_threads(), total_length)); - const _Settings& __s = _Settings::get(); + std::min(get_max_threads(), total_length)); # pragma omp parallel num_threads (num_threads) { @@ -1562,126 +1333,12 @@ template( - ::operator new(sizeof(value_type) * k * num_samples)); - // Sample. - for (int s = 0; s < k; ++s) - for (difference_type i = 0; i < num_samples; ++i) - { - difference_type sample_index = - static_cast( - _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) - * (double(i + 1) / (num_samples + 1)) - * (double(length) / total_length)); - ::new(&(samples[s * num_samples + i])) - value_type(seqs_begin[s].first[sample_index]); - } - - if (stable) - __gnu_sequential::stable_sort(samples, samples - + (num_samples * k), comp); - else - __gnu_sequential::sort(samples, samples - + (num_samples * k), comp); - - for (int slab = 0; slab < num_threads; ++slab) - // For each slab / processor. - for (int seq = 0; seq < k; ++seq) - { - // For each sequence. - if (slab > 0) - pieces[slab][seq].first = - std::upper_bound(seqs_begin[seq].first, - seqs_begin[seq].second, - samples[num_samples * k - * slab / num_threads], - comp) - - seqs_begin[seq].first; - else - { - // Absolute beginning. - pieces[slab][seq].first = 0; - } - if ((slab + 1) < num_threads) - pieces[slab][seq].second = - std::upper_bound(seqs_begin[seq].first, - seqs_begin[seq].second, - samples[num_samples * k - * (slab + 1) - / num_threads], comp) - - seqs_begin[seq].first; - else - pieces[slab][seq].second - = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]); - } - ::operator delete(samples); - } - else - { - // (_Settings::multiway_merge_splitting == _Settings::EXACT). - std::vector* offsets = - new std::vector[num_threads]; - std::vector< - std::pair - > se(k); - - copy(seqs_begin, seqs_end, se.begin()); - - difference_type* borders = - new difference_type[num_threads + 1]; - equally_split(length, num_threads, borders); - - for (int s = 0; s < (num_threads - 1); ++s) - { - offsets[s].resize(k); - multiseq_partition( - se.begin(), se.end(), borders[s + 1], - offsets[s].begin(), comp); - - // Last one also needed and available. - if (!tight) - { - offsets[num_threads - 1].resize(k); - multiseq_partition(se.begin(), se.end(), - difference_type(length), - offsets[num_threads - 1].begin(), - comp); - } - } - - - for (int slab = 0; slab < num_threads; ++slab) - { - // For each slab / processor. - for (int seq = 0; seq < k; ++seq) - { - // For each sequence. - if (slab == 0) - { - // Absolute beginning. - pieces[slab][seq].first = 0; - } - else - pieces[slab][seq].first = - pieces[slab - 1][seq].second; - if (!tight || slab < (num_threads - 1)) - pieces[slab][seq].second = - offsets[slab][seq] - seqs_begin[seq].first; - else - { - // slab == num_threads - 1 - pieces[slab][seq].second = - _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]); - } - } - } - delete[] offsets; - } + splitter(seqs_begin, seqs_end, comp, length, total_length, + pieces); } //single thread_index_t iam = omp_get_thread_num(); @@ -1701,15 +1358,14 @@ template( chunks, chunks + k, target + target_position, comp, - std::min(local_length, length - target_position), - stable, false, sequential_tag()); + std::min(local_length, length - target_position)); delete[] chunks; } @@ -1727,7 +1383,7 @@ template + * int sequences[10][10]; + * for (int i = 0; i < 10; ++i) + * for (int j = 0; i < 10; ++j) + * sequences[i][j] = j; + * + * int out[33]; + * std::vector > seqs; + * for (int i = 0; i < 10; ++i) + * { seqs.push(std::make_pair(sequences[i], sequences[i] + 10)) } + * + * multiway_merge(seqs.begin(), seqs.end(), target, std::less(), 33); + * + * + * @see stable_multiway_merge + * + * @pre All input sequences must be sorted. + * @pre Target must provide enough space to merge out length elements or + * the number of elements in all sequences, whichever is smaller. + * + * @post [target, return value) contains merged elements from the + * input sequences. + * @post return value - target = min(length, number of elements in all + * sequences). + * + * @param RandomAccessIteratorPairIterator iterator over sequence + * of pairs of iterators + * @param RandomAccessIteratorOut iterator over target sequence + * @param _DifferenceTp difference type for the sequence + * @param Comparator strict weak ordering type to compare elements + * in sequences + * + * @param seqs_begin begin of sequence sequence + * @param seqs_end end of sequence sequence + * @param target target sequence to merge to. + * @param comp strict weak ordering to use for element comparison. + * @param length the number of elements to merge into target. + * + * @return end iterator of output sequence */ -template - RandomAccessIterator3 - multiway_merge(RandomAccessIteratorPairIterator seqs_begin, - RandomAccessIteratorPairIterator seqs_end, - RandomAccessIterator3 target, Comparator comp, - _DifferenceTp length, bool stable) - { +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + RandomAccessIteratorOut target_end; + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + target_end = parallel_multiway_merge + + (seqs_begin, seqs_end, target, comp, + multiway_merge_sampling_splitting, + static_cast(length)); + else + target_end = sequential_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, length); + + return target_end; +} + +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length + , __gnu_parallel::sequential_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + return target; + + // Execute multiway merge *sequentially*. + return sequential_multiway_merge + + (seqs_begin, seqs_end, target, comp, length); +} + +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length + , __gnu_parallel::exact_tag) +{ typedef _DifferenceTp difference_type; _GLIBCXX_CALL(seqs_end - seqs_begin) + // catch special case: no sequences if (seqs_begin == seqs_end) return target; - const _Settings& __s = _Settings::get(); - - RandomAccessIterator3 target_end; - if (_GLIBCXX_PARALLEL_CONDITION( - ((seqs_end - seqs_begin) >= __s.multiway_merge_minimal_k) - && ((sequence_index_t)length >= __s.multiway_merge_minimal_n))) - target_end = parallel_multiway_merge(seqs_begin, seqs_end, - target, comp, - static_cast(length), - stable, false); + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + RandomAccessIteratorOut target_end; + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + target_end = parallel_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, + multiway_merge_exact_splitting, + static_cast(length)); else - target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, - stable, false, sequential_tag()); + target_end = sequential_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, length); return target_end; - } +} -/** @brief Multi-way merging front-end. - * @param seqs_begin Begin iterator of iterator pair input sequence. - * @param seqs_end End iterator of iterator pair input sequence. - * @param target Begin iterator out output sequence. - * @param comp Comparator. - * @param length Maximum length to merge. - * @param stable Stable merging incurs a performance penalty. - * @return End iterator of output sequence. - * @pre For each @c i, @c seqs_begin[i].second must be the end - * marker of the sequence, but also reference the one more sentinel - * element. */ -template - RandomAccessIterator3 - multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin, - RandomAccessIteratorPairIterator seqs_end, - RandomAccessIterator3 target, - Comparator comp, - _DifferenceTp length, - bool stable) - { +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length) +{ typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + // catch special case: no sequences if (seqs_begin == seqs_end) return target; + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + RandomAccessIteratorOut target_end; + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + target_end = parallel_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, + multiway_merge_sampling_splitting, + static_cast(length)); + else + target_end = sequential_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, length); + + return target_end; +} + +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length + , __gnu_parallel::sequential_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + { return target; } + + // Execute multiway merge *sequentially*. + return sequential_multiway_merge + + (seqs_begin, seqs_end, target, comp, length); +} + +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length + , __gnu_parallel::exact_tag) +{ + typedef _DifferenceTp difference_type; _GLIBCXX_CALL(seqs_end - seqs_begin) - const _Settings& __s = _Settings::get(); - const bool cond1 = seqs_end - seqs_begin >= __s.multiway_merge_minimal_k; - const bool cond2 = sequence_index_t(length) >= __s.multiway_merge_minimal_n; - if (_GLIBCXX_PARALLEL_CONDITION(cond1 && cond2)) - return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, - length, stable, true); + // catch special case: no sequences + if (seqs_begin == seqs_end) + { return target; } + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + RandomAccessIteratorOut target_end; + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + target_end = parallel_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, + multiway_merge_exact_splitting + , + static_cast(length)); else - return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, - true, sequential_tag()); - } + target_end = sequential_multiway_merge( + seqs_begin, seqs_end, + target, comp, length); + + return target_end; } +/** + * @brief Multiway Merge Frontend. + * + * Merge the sequences specified by seqs_begin and seqs_end into + * target. seqs_begin and seqs_end must point to a sequence of + * pairs. These pairs must contain an iterator to the beginning + * of a sequence in their first entry and an iterator the end of + * the same sequence in their second entry. + * + * Ties are broken arbitrarily. See stable_multiway_merge for a variant + * that breaks ties by sequence number but is slower. + * + * The first entries of the pairs (i.e. the begin iterators) will be moved + * forward. + * + * The output sequence has to provide enough space for all elements + * that are written to it. + * + * This function will merge the input sequences: + * + * - not stable + * - parallel, depending on the input size and Settings + * - using sampling for splitting + * - using sentinels + * + * You have to take care that the element the end iterator points to is + * readable and contains a value that is greater than any other non-sentinel + * value in all sequences. + * + * Example: + * + *
+ *   int sequences[10][11];
+ *   for (int i = 0; i < 10; ++i)
+ *     for (int j = 0; i < 11; ++j)
+ *       sequences[i][j] = j; // last one is sentinel!
+ *
+ *   int out[33];
+ *   std::vector > seqs;
+ *   for (int i = 0; i < 10; ++i)
+ *     { seqs.push(std::make_pair(sequences[i], sequences[i] + 10)) }
+ *
+ *   multiway_merge(seqs.begin(), seqs.end(), target, std::less(), 33);
+ * 
+ * + * @pre All input sequences must be sorted. + * @pre Target must provide enough space to merge out length elements or + * the number of elements in all sequences, whichever is smaller. + * @pre For each @c i, @c seqs_begin[i].second must be the end + * marker of the sequence, but also reference the one more sentinel + * element. + * + * @post [target, return value) contains merged elements from the + * input sequences. + * @post return value - target = min(length, number of elements in all + * sequences). + * + * @see stable_multiway_merge_sentinels + * + * @param RandomAccessIteratorPairIterator iterator over sequence + * of pairs of iterators + * @param RandomAccessIteratorOut iterator over target sequence + * @param _DifferenceTp difference type for the sequence + * @param Comparator strict weak ordering type to compare elements + * in sequences + * + * @param seqs_begin begin of sequence sequence + * @param seqs_end end of sequence sequence + * @param target target sequence to merge to. + * @param comp strict weak ordering to use for element comparison. + * @param length the number of elements to merge into target. + * + * @return end iterator of output sequence + */ +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + { return target; } + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + RandomAccessIteratorOut target_end; + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + target_end = parallel_multiway_merge + + (seqs_begin, seqs_end, target, comp, + multiway_merge_sampling_splitting + , + static_cast(length)); + else + target_end = sequential_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, length); + + return target_end; +} + +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length + , __gnu_parallel::sequential_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + { return target; } + + // Execute multiway merge *sequentially*. + return sequential_multiway_merge + + (seqs_begin, seqs_end, target, comp, length); +} + +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length + , __gnu_parallel::exact_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + { return target; } + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + RandomAccessIteratorOut target_end; + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + target_end = parallel_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, + multiway_merge_exact_splitting + , + static_cast(length)); + else + target_end = sequential_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, length); + + return target_end; +} + +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + { return target; } + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + RandomAccessIteratorOut target_end; + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + target_end = parallel_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, + multiway_merge_sampling_splitting + , + static_cast(length)); + else + target_end = sequential_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, length); + + return target_end; +} + +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length + , __gnu_parallel::sequential_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + { return target; } + + // Execute multiway merge *sequentially*. + return sequential_multiway_merge + + (seqs_begin, seqs_end, target, comp, length); +} + +template< + typename RandomAccessIteratorPairIterator + , typename RandomAccessIteratorOut + , typename _DifferenceTp + , typename Comparator> +RandomAccessIteratorOut +stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin + , RandomAccessIteratorPairIterator seqs_end + , RandomAccessIteratorOut target + , Comparator comp, _DifferenceTp length + , __gnu_parallel::exact_tag) +{ + typedef _DifferenceTp difference_type; + _GLIBCXX_CALL(seqs_end - seqs_begin) + + // catch special case: no sequences + if (seqs_begin == seqs_end) + { return target; } + + // Execute merge; maybe parallel, depending on the number of merged + // elements and the number of sequences and global thresholds in + // Settings. + RandomAccessIteratorOut target_end; + if ((seqs_end - seqs_begin > 1) && + _GLIBCXX_PARALLEL_CONDITION( + ((seqs_end - seqs_begin) >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_k) + && ((sequence_index_t)length >= + __gnu_parallel::_Settings::get().multiway_merge_minimal_n))) + target_end = parallel_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, + multiway_merge_exact_splitting + , + static_cast(length)); + else + target_end = sequential_multiway_merge + ( + seqs_begin, seqs_end, + target, comp, length); + + return target_end; +} + +}; // namespace __gnu_parallel + #endif diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h index c8ceb2f40b7..3791a144d53 100644 --- a/libstdc++-v3/include/parallel/multiway_mergesort.h +++ b/libstdc++-v3/include/parallel/multiway_mergesort.h @@ -80,26 +80,9 @@ template /** @brief Start indices, per thread. */ difference_type* starts; - /** @brief Temporary arrays for each thread. - * - * Indirection Allows using the temporary storage in different - * ways, without code duplication. - * @see _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST */ - value_type** temporaries; - -#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST /** @brief Storage in which to sort. */ - RandomAccessIterator* sorting_places; + value_type** temporary; - /** @brief Storage into which to merge. */ - value_type** merging_places; -#else - /** @brief Storage in which to sort. */ - value_type** sorting_places; - - /** @brief Storage into which to merge. */ - RandomAccessIterator* merging_places; -#endif /** @brief Samples. */ value_type* samples; @@ -108,9 +91,6 @@ template /** @brief Pieces of data to merge @c [thread][sequence] */ std::vector >* pieces; - - /** @brief Stable sorting desired. */ - bool stable; }; /** @@ -122,7 +102,7 @@ template template void determine_samples(PMWMSSortingData* sd, - _DifferenceTp& num_samples) + _DifferenceTp num_samples) { typedef std::iterator_traits traits_type; typedef typename traits_type::value_type value_type; @@ -130,8 +110,6 @@ template thread_index_t iam = omp_get_thread_num(); - num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1; - difference_type* es = new difference_type[num_samples + 2]; equally_split(sd->starts[iam + 1] - sd->starts[iam], @@ -144,11 +122,201 @@ template delete[] es; } +/** @brief Split consistently. */ +template + struct split_consistently + { + }; + +/** @brief Split by exact splitting. */ +template + struct split_consistently + + { + void operator()( + const thread_index_t iam, + PMWMSSortingData* sd, + Comparator& comp, + const typename + std::iterator_traits::difference_type + num_samples) + const + { +# pragma omp barrier + + std::vector > + seqs(sd->num_threads); + for (thread_index_t s = 0; s < sd->num_threads; s++) + seqs[s] = std::make_pair(sd->temporary[s], + sd->temporary[s] + + (sd->starts[s + 1] - sd->starts[s])); + + std::vector offsets(sd->num_threads); + + // if not last thread + if (iam < sd->num_threads - 1) + multiseq_partition(seqs.begin(), seqs.end(), + sd->starts[iam + 1], offsets.begin(), comp); + + for (int seq = 0; seq < sd->num_threads; seq++) + { + // for each sequence + if (iam < (sd->num_threads - 1)) + sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first; + else + // very end of this sequence + sd->pieces[iam][seq].end = + sd->starts[seq + 1] - sd->starts[seq]; + } + +# pragma omp barrier + + for (thread_index_t seq = 0; seq < sd->num_threads; seq++) + { + // For each sequence. + if (iam > 0) + sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end; + else + // Absolute beginning. + sd->pieces[iam][seq].begin = 0; + } + } + }; + +/** @brief Split by sampling. */ +template + struct split_consistently + { + void operator()( + const thread_index_t iam, + PMWMSSortingData* sd, + Comparator& comp, + const typename + std::iterator_traits::difference_type + num_samples) + const + { + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + determine_samples(sd, num_samples); + +# pragma omp barrier + +# pragma omp single + __gnu_sequential::sort(sd->samples, + sd->samples + (num_samples * sd->num_threads), + comp); + +# pragma omp barrier + + for (thread_index_t s = 0; s < sd->num_threads; ++s) + { + // For each sequence. + if (num_samples * iam > 0) + sd->pieces[iam][s].begin = + std::lower_bound(sd->temporary[s], + sd->temporary[s] + + (sd->starts[s + 1] - sd->starts[s]), + sd->samples[num_samples * iam], + comp) + - sd->temporary[s]; + else + // Absolute beginning. + sd->pieces[iam][s].begin = 0; + + if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads)) + sd->pieces[iam][s].end = + std::lower_bound(sd->temporary[s], + sd->temporary[s] + + (sd->starts[s + 1] - sd->starts[s]), + sd->samples[num_samples * (iam + 1)], + comp) + - sd->temporary[s]; + else + // Absolute end. + sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s]; + } + } + }; + +template + struct possibly_stable_sort + { + }; + +template + struct possibly_stable_sort + { + void operator()(const RandomAccessIterator& begin, + const RandomAccessIterator& end, Comparator& comp) const + { + __gnu_sequential::stable_sort(begin, end, comp); + } + }; + +template + struct possibly_stable_sort + { + void operator()(const RandomAccessIterator begin, + const RandomAccessIterator end, Comparator& comp) const + { + __gnu_sequential::sort(begin, end, comp); + } + }; + +template + struct possibly_stable_multiway_merge + { + }; + +template + struct possibly_stable_multiway_merge + + { + void operator()(const SeqRandomAccessIterator& seqs_begin, + const SeqRandomAccessIterator& seqs_end, + const RandomAccessIterator& target, + Comparator& comp, + DiffType length_am) const + { + stable_multiway_merge(seqs_begin, seqs_end, target, comp, + length_am, sequential_tag()); + } + }; + +template + struct possibly_stable_multiway_merge + + { + void operator()(const SeqRandomAccessIterator& seqs_begin, + const SeqRandomAccessIterator& seqs_end, + const RandomAccessIterator& target, + Comparator& comp, + DiffType length_am) const + { + multiway_merge(seqs_begin, seqs_end, target, comp, + length_am, sequential_tag()); + } + }; + /** @brief PMWMS code executed by each thread. * @param sd Pointer to algorithm data. * @param comp Comparator. */ -template +template void parallel_sort_mwms_pu(PMWMSSortingData* sd, Comparator& comp) @@ -162,165 +330,65 @@ template // Length of this thread's chunk, before merging. difference_type length_local = sd->starts[iam + 1] - sd->starts[iam]; -#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST - typedef RandomAccessIterator SortingPlacesIterator; + // Sort in temporary storage, leave space for sentinel. - // Sort in input storage. - sd->sorting_places[iam] = sd->source + sd->starts[iam]; -#else typedef value_type* SortingPlacesIterator; - // Sort in temporary storage, leave space for sentinel. - sd->sorting_places[iam] = sd->temporaries[iam] = + sd->temporary[iam] = static_cast( ::operator new(sizeof(value_type) * (length_local + 1))); // Copy there. std::uninitialized_copy(sd->source + sd->starts[iam], sd->source + sd->starts[iam] + length_local, - sd->sorting_places[iam]); -#endif - - // Sort locally. - if (sd->stable) - __gnu_sequential::stable_sort(sd->sorting_places[iam], - sd->sorting_places[iam] + length_local, - comp); - else - __gnu_sequential::sort(sd->sorting_places[iam], - sd->sorting_places[iam] + length_local, - comp); - - // Invariant: locally sorted subsequence in sd->sorting_places[iam], - // sd->sorting_places[iam] + length_local. - const _Settings& __s = _Settings::get(); - if (__s.sort_splitting == SAMPLING) - { - difference_type num_samples; - determine_samples(sd, num_samples); - -# pragma omp barrier - -# pragma omp single - __gnu_sequential::sort(sd->samples, - sd->samples + (num_samples * sd->num_threads), - comp); - -# pragma omp barrier - - for (int s = 0; s < sd->num_threads; ++s) - { - // For each sequence. - if (num_samples * iam > 0) - sd->pieces[iam][s].begin = - std::lower_bound(sd->sorting_places[s], - sd->sorting_places[s] - + (sd->starts[s + 1] - sd->starts[s]), - sd->samples[num_samples * iam], - comp) - - sd->sorting_places[s]; - else - // Absolute beginning. - sd->pieces[iam][s].begin = 0; - - if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads)) - sd->pieces[iam][s].end = - std::lower_bound(sd->sorting_places[s], - sd->sorting_places[s] - + (sd->starts[s + 1] - sd->starts[s]), - sd->samples[num_samples * (iam + 1)], - comp) - - sd->sorting_places[s]; - else - // Absolute end. - sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s]; - } - } - else if (__s.sort_splitting == EXACT) - { -# pragma omp barrier + sd->temporary[iam]); - std::vector > - seqs(sd->num_threads); - for (int s = 0; s < sd->num_threads; ++s) - seqs[s] = std::make_pair(sd->sorting_places[s], - sd->sorting_places[s] - + (sd->starts[s + 1] - sd->starts[s])); + possibly_stable_sort() + (sd->temporary[iam], sd->temporary[iam] + length_local, comp); - std::vector offsets(sd->num_threads); + // Invariant: locally sorted subsequence in sd->temporary[iam], + // sd->temporary[iam] + length_local. - // if not last thread - if (iam < sd->num_threads - 1) - multiseq_partition(seqs.begin(), seqs.end(), - sd->starts[iam + 1], offsets.begin(), comp); + // No barrier here: Synchronization is done by the splitting routine. - for (int seq = 0; seq < sd->num_threads; ++seq) - { - // for each sequence - if (iam < (sd->num_threads - 1)) - sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first; - else - // very end of this sequence - sd->pieces[iam][seq].end = (sd->starts[seq + 1] - - sd->starts[seq]); - } - -# pragma omp barrier - - for (int seq = 0; seq < sd->num_threads; ++seq) - { - // For each sequence. - if (iam > 0) - sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end; - else - // Absolute beginning. - sd->pieces[iam][seq].begin = 0; - } - } + difference_type num_samples = + _Settings::get().sort_mwms_oversampling * sd->num_threads - 1; + split_consistently + () + (iam, sd, comp, num_samples); // Offset from target begin, length after merging. difference_type offset = 0, length_am = 0; - for (int s = 0; s < sd->num_threads; ++s) + for (thread_index_t s = 0; s < sd->num_threads; s++) { length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin; offset += sd->pieces[iam][s].begin; } -#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST - // Merge to temporary storage, uninitialized creation not possible - // since there is no multiway_merge calling the placement new - // instead of the assignment operator. - // XXX incorrect (de)construction - sd->merging_places[iam] = sd->temporaries[iam] = - static_cast(::operator new(sizeof(value_type) - * length_am)); -#else - // Merge directly to target. - sd->merging_places[iam] = sd->source + offset; -#endif - std::vector > - seqs(sd->num_threads); + typedef std::vector< + std::pair > + seq_vector_type; + seq_vector_type seqs(sd->num_threads); for (int s = 0; s < sd->num_threads; ++s) { seqs[s] = - std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin, - sd->sorting_places[s] + sd->pieces[iam][s].end); + std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin, + sd->temporary[s] + sd->pieces[iam][s].end); } - multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, - length_am, sd->stable, false, sequential_tag()); + possibly_stable_multiway_merge< + stable, + typename seq_vector_type::iterator, + RandomAccessIterator, + Comparator, difference_type>() + (seqs.begin(), seqs.end(), + sd->source + offset, comp, + length_am); # pragma omp barrier -#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST - // Write back. - std::copy(sd->merging_places[iam], - sd->merging_places[iam] + length_am, - sd->source + offset); -#endif - - ::operator delete(sd->temporaries[iam]); + ::operator delete(sd->temporary[iam]); } /** @brief PMWMS main call. @@ -329,21 +397,22 @@ template * @param comp Comparator. * @param n Length of sequence. * @param num_threads Number of threads to use. - * @param stable Stable sorting. */ -template +template void parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end, - Comparator comp, typename - std::iterator_traits:: - difference_type n, int num_threads, bool stable) + Comparator comp, + thread_index_t num_threads) { - _GLIBCXX_CALL(n) + _GLIBCXX_CALL(end - begin) typedef std::iterator_traits traits_type; typedef typename traits_type::value_type value_type; typedef typename traits_type::difference_type difference_type; + difference_type n = end - begin; + if (n <= 1) return; @@ -354,7 +423,6 @@ template // shared variables PMWMSSortingData sd; difference_type* starts; - const _Settings& __s = _Settings::get(); # pragma omp parallel num_threads(num_threads) { @@ -364,23 +432,16 @@ template { sd.num_threads = num_threads; sd.source = begin; - sd.temporaries = new value_type*[num_threads]; - -#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST - sd.sorting_places = new RandomAccessIterator[num_threads]; - sd.merging_places = new value_type*[num_threads]; -#else - sd.sorting_places = new value_type*[num_threads]; - sd.merging_places = new RandomAccessIterator[num_threads]; -#endif - if (__s.sort_splitting == SAMPLING) + sd.temporary = new value_type*[num_threads]; + + if (!exact) { - unsigned int size = - (__s.sort_mwms_oversampling * num_threads - 1) + difference_type size = + (_Settings::get().sort_mwms_oversampling * num_threads - 1) * num_threads; sd.samples = static_cast( - ::operator new(size * sizeof(value_type))); + ::operator new(size * sizeof(value_type))); } else sd.samples = NULL; @@ -390,7 +451,6 @@ template for (int s = 0; s < num_threads; ++s) sd.pieces[s].resize(num_threads); starts = sd.starts = new difference_type[num_threads + 1]; - sd.stable = stable; difference_type chunk_length = n / num_threads; difference_type split = n % num_threads; @@ -401,18 +461,16 @@ template pos += (i < split) ? (chunk_length + 1) : chunk_length; } starts[num_threads] = pos; - } + } //single // Now sort in parallel. - parallel_sort_mwms_pu(&sd, comp); + parallel_sort_mwms_pu(&sd, comp); } //parallel delete[] starts; - delete[] sd.temporaries; - delete[] sd.sorting_places; - delete[] sd.merging_places; + delete[] sd.temporary; - if (__s.sort_splitting == SAMPLING) + if (!exact) ::operator delete(sd.samples); delete[] sd.offsets; diff --git a/libstdc++-v3/include/parallel/sort.h b/libstdc++-v3/include/parallel/sort.h index edf4eea02d8..83aa2df1b11 100644 --- a/libstdc++-v3/include/parallel/sort.h +++ b/libstdc++-v3/include/parallel/sort.h @@ -71,7 +71,7 @@ namespace __gnu_parallel template inline void parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, - Comparator comp, bool stable) + Comparator comp, bool stable) { _GLIBCXX_CALL(end - begin) typedef std::iterator_traits traits_type; @@ -79,25 +79,43 @@ namespace __gnu_parallel typedef typename traits_type::difference_type difference_type; if (begin != end) - { - difference_type n = end - begin; + { + difference_type n = end - begin; - if (false) ; + if (false) ; #if _GLIBCXX_MERGESORT - else if (stable || _Settings::get().sort_algorithm == MWMS) - parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable); + else if (stable) + { + if(_Settings::get().sort_splitting == EXACT) + parallel_sort_mwms + (begin, end, comp, get_max_threads()); + else + parallel_sort_mwms + (begin, end, comp, get_max_threads()); + } + else if (_Settings::get().sort_algorithm == MWMS) + { + if(_Settings::get().sort_splitting == EXACT) + parallel_sort_mwms + (begin, end, comp, get_max_threads()); + else + parallel_sort_mwms + (begin, end, comp, get_max_threads()); + } #endif #if _GLIBCXX_QUICKSORT - else if (!stable && _Settings::get().sort_algorithm == QS) - parallel_sort_qs(begin, end, comp, n, get_max_threads()); + else if (!stable && _Settings::get().sort_algorithm == QS) + parallel_sort_qs(begin, end, comp, n, get_max_threads()); #endif #if _GLIBCXX_BAL_QUICKSORT - else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED) - parallel_sort_qsb(begin, end, comp, n, get_max_threads()); + else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED) + parallel_sort_qsb(begin, end, comp, n, get_max_threads()); #endif - else - __gnu_sequential::sort(begin, end, comp); - } + else if(stable) + __gnu_sequential::stable_sort(begin, end, comp); + else + __gnu_sequential::sort(begin, end, comp); + } } } // end namespace __gnu_parallel diff --git a/libstdc++-v3/include/parallel/tags.h b/libstdc++-v3/include/parallel/tags.h index b3f2ec86912..f57add97c7b 100644 --- a/libstdc++-v3/include/parallel/tags.h +++ b/libstdc++-v3/include/parallel/tags.h @@ -44,6 +44,9 @@ namespace __gnu_parallel /** @brief Forces sequential execution at compile time. */ struct sequential_tag { }; + /** @brief Forces exact splitting in multiway merge at compile time. */ + struct exact_tag { }; + /** @brief Recommends parallel execution at compile time. */ struct parallel_tag { }; diff --git a/libstdc++-v3/include/parallel/types.h b/libstdc++-v3/include/parallel/types.h index ded617edb6d..1b646b02084 100644 --- a/libstdc++-v3/include/parallel/types.h +++ b/libstdc++-v3/include/parallel/types.h @@ -87,15 +87,10 @@ namespace __gnu_parallel /// Merging algorithms: // bubblesort-alike, loser-tree variants, enum sentinel. enum _MultiwayMergeAlgorithm - { - BUBBLE, - LOSER_TREE_EXPLICIT, - LOSER_TREE, - LOSER_TREE_COMBINED, - LOSER_TREE_SENTINEL, - ENUM_SENTINEL + { + LOSER_TREE }; - + /// Partial sum algorithms: recursive, linear. enum _PartialSumAlgorithm { diff --git a/libstdc++-v3/testsuite/25_algorithms/sort/35588.cc b/libstdc++-v3/testsuite/25_algorithms/sort/35588.cc new file mode 100644 index 00000000000..715fa3b93d2 --- /dev/null +++ b/libstdc++-v3/testsuite/25_algorithms/sort/35588.cc @@ -0,0 +1,32 @@ +// Copyright (C) 2008 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +// USA. + +#include +#include +#include + +// libstdc++/35588 +int main() +{ + using namespace std; + using namespace tr1; + using namespace placeholders; + + int t[10]; + sort(t, t+10, bind(less(), _1, _2)); +} -- 2.30.2