3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file parallel/multiway_mergesort.h
26 * @brief Parallel multiway merge sort.
27 * This file is a GNU parallel extension to the Standard C++ Library.
30 // Written by Johannes Singler.
32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
37 #include <parallel/basic_iterator.h>
38 #include <bits/stl_algo.h>
39 #include <parallel/parallel.h>
40 #include <parallel/multiway_merge.h>
42 namespace __gnu_parallel
44 /** @brief Subsequence description. */
45 template<typename _DifferenceTp
>
48 typedef _DifferenceTp _DifferenceType
;
50 /** @brief Begin of subsequence. */
51 _DifferenceType _M_begin
;
53 /** @brief End of subsequence. */
54 _DifferenceType _M_end
;
57 /** @brief Data accessed by all threads.
59 * PMWMS = parallel multiway mergesort */
60 template<typename _RAIter
>
61 struct _PMWMSSortingData
63 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
64 typedef typename
_TraitsType::value_type _ValueType
;
65 typedef typename
_TraitsType::difference_type _DifferenceType
;
67 /** @brief Number of threads involved. */
68 _ThreadIndex _M_num_threads
;
70 /** @brief Input __begin. */
73 /** @brief Start indices, per thread. */
74 _DifferenceType
* _M_starts
;
76 /** @brief Storage in which to sort. */
77 _ValueType
** _M_temporary
;
79 /** @brief Samples. */
80 _ValueType
* _M_samples
;
82 /** @brief Offsets to add to the found positions. */
83 _DifferenceType
* _M_offsets
;
85 /** @brief Pieces of data to merge @__c [thread][__sequence] */
86 std::vector
<_Piece
<_DifferenceType
> >* _M_pieces
;
90 * @brief Select _M_samples from a sequence.
91 * @param __sd Pointer to algorithm data. _Result will be placed in
92 * @__c __sd->_M_samples.
93 * @param __num_samples Number of _M_samples to select.
95 template<typename _RAIter
, typename _DifferenceTp
>
97 __determine_samples(_PMWMSSortingData
<_RAIter
>* __sd
,
98 _DifferenceTp __num_samples
)
100 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
101 typedef typename
_TraitsType::value_type _ValueType
;
102 typedef _DifferenceTp _DifferenceType
;
104 _ThreadIndex __iam
= omp_get_thread_num();
106 _DifferenceType
* __es
= new _DifferenceType
[__num_samples
+ 2];
108 equally_split(__sd
->_M_starts
[__iam
+ 1] - __sd
->_M_starts
[__iam
],
109 __num_samples
+ 1, __es
);
111 for (_DifferenceType __i
= 0; __i
< __num_samples
; ++__i
)
112 ::new(&(__sd
->_M_samples
[__iam
* __num_samples
+ __i
]))
113 _ValueType(__sd
->_M_source
[__sd
->_M_starts
[__iam
]
119 /** @brief Split consistently. */
120 template<bool __exact
, typename _RAIter
,
121 typename _Compare
, typename _SortingPlacesIterator
>
122 struct _SplitConsistently
125 /** @brief Split by exact splitting. */
126 template<typename _RAIter
, typename _Compare
,
127 typename _SortingPlacesIterator
>
128 struct _SplitConsistently
<true, _RAIter
,
129 _Compare
, _SortingPlacesIterator
>
132 operator()(const _ThreadIndex __iam
,
133 _PMWMSSortingData
<_RAIter
>* __sd
,
136 std::iterator_traits
<_RAIter
>::difference_type
141 std::vector
<std::pair
<_SortingPlacesIterator
,
142 _SortingPlacesIterator
> >
143 seqs(__sd
->_M_num_threads
);
144 for (_ThreadIndex __s
= 0; __s
< __sd
->_M_num_threads
; __s
++)
145 seqs
[__s
] = std::make_pair(__sd
->_M_temporary
[__s
],
146 __sd
->_M_temporary
[__s
]
147 + (__sd
->_M_starts
[__s
+ 1]
148 - __sd
->_M_starts
[__s
]));
150 std::vector
<_SortingPlacesIterator
> _M_offsets(__sd
->_M_num_threads
);
152 // if not last thread
153 if (__iam
< __sd
->_M_num_threads
- 1)
154 multiseq_partition(seqs
.begin(), seqs
.end(),
155 __sd
->_M_starts
[__iam
+ 1], _M_offsets
.begin(),
158 for (int __seq
= 0; __seq
< __sd
->_M_num_threads
; __seq
++)
161 if (__iam
< (__sd
->_M_num_threads
- 1))
162 __sd
->_M_pieces
[__iam
][__seq
]._M_end
163 = _M_offsets
[__seq
] - seqs
[__seq
].first
;
165 // very end of this sequence
166 __sd
->_M_pieces
[__iam
][__seq
]._M_end
=
167 __sd
->_M_starts
[__seq
+ 1] - __sd
->_M_starts
[__seq
];
172 for (_ThreadIndex __seq
= 0; __seq
< __sd
->_M_num_threads
; __seq
++)
174 // For each sequence.
176 __sd
->_M_pieces
[__iam
][__seq
]._M_begin
=
177 __sd
->_M_pieces
[__iam
- 1][__seq
]._M_end
;
179 // Absolute beginning.
180 __sd
->_M_pieces
[__iam
][__seq
]._M_begin
= 0;
185 /** @brief Split by sampling. */
186 template<typename _RAIter
, typename _Compare
,
187 typename _SortingPlacesIterator
>
188 struct _SplitConsistently
<false, _RAIter
, _Compare
,
189 _SortingPlacesIterator
>
192 operator()(const _ThreadIndex __iam
,
193 _PMWMSSortingData
<_RAIter
>* __sd
,
196 std::iterator_traits
<_RAIter
>::difference_type
199 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
200 typedef typename
_TraitsType::value_type _ValueType
;
201 typedef typename
_TraitsType::difference_type _DifferenceType
;
203 __determine_samples(__sd
, __num_samples
);
208 __gnu_sequential::sort(__sd
->_M_samples
,
210 + (__num_samples
* __sd
->_M_num_threads
),
215 for (_ThreadIndex __s
= 0; __s
< __sd
->_M_num_threads
; ++__s
)
217 // For each sequence.
218 if (__num_samples
* __iam
> 0)
219 __sd
->_M_pieces
[__iam
][__s
]._M_begin
=
220 std::lower_bound(__sd
->_M_temporary
[__s
],
221 __sd
->_M_temporary
[__s
]
222 + (__sd
->_M_starts
[__s
+ 1]
223 - __sd
->_M_starts
[__s
]),
224 __sd
->_M_samples
[__num_samples
* __iam
],
226 - __sd
->_M_temporary
[__s
];
228 // Absolute beginning.
229 __sd
->_M_pieces
[__iam
][__s
]._M_begin
= 0;
231 if ((__num_samples
* (__iam
+ 1)) <
232 (__num_samples
* __sd
->_M_num_threads
))
233 __sd
->_M_pieces
[__iam
][__s
]._M_end
=
234 std::lower_bound(__sd
->_M_temporary
[__s
],
235 __sd
->_M_temporary
[__s
]
236 + (__sd
->_M_starts
[__s
+ 1]
237 - __sd
->_M_starts
[__s
]),
238 __sd
->_M_samples
[__num_samples
* (__iam
+ 1)],
240 - __sd
->_M_temporary
[__s
];
243 __sd
->_M_pieces
[__iam
][__s
]._M_end
= (__sd
->_M_starts
[__s
+ 1]
244 - __sd
->_M_starts
[__s
]);
249 template<bool __stable
, typename _RAIter
, typename _Compare
>
250 struct __possibly_stable_sort
253 template<typename _RAIter
, typename _Compare
>
254 struct __possibly_stable_sort
<true, _RAIter
, _Compare
>
256 void operator()(const _RAIter
& __begin
,
257 const _RAIter
& __end
, _Compare
& __comp
) const
258 { __gnu_sequential::stable_sort(__begin
, __end
, __comp
); }
261 template<typename _RAIter
, typename _Compare
>
262 struct __possibly_stable_sort
<false, _RAIter
, _Compare
>
264 void operator()(const _RAIter __begin
,
265 const _RAIter __end
, _Compare
& __comp
) const
266 { __gnu_sequential::sort(__begin
, __end
, __comp
); }
269 template<bool __stable
, typename Seq_RAIter
,
270 typename _RAIter
, typename _Compare
,
272 struct __possibly_stable_multiway_merge
275 template<typename Seq_RAIter
, typename _RAIter
,
276 typename _Compare
, typename _DiffType
>
277 struct __possibly_stable_multiway_merge
<true, Seq_RAIter
,
278 _RAIter
, _Compare
, _DiffType
>
280 void operator()(const Seq_RAIter
& __seqs_begin
,
281 const Seq_RAIter
& __seqs_end
,
282 const _RAIter
& __target
,
284 _DiffType __length_am
) const
286 stable_multiway_merge(__seqs_begin
, __seqs_end
, __target
, __length_am
,
287 __comp
, sequential_tag());
291 template<typename Seq_RAIter
, typename _RAIter
,
292 typename _Compare
, typename _DiffType
>
293 struct __possibly_stable_multiway_merge
<false, Seq_RAIter
,
294 _RAIter
, _Compare
, _DiffType
>
296 void operator()(const Seq_RAIter
& __seqs_begin
,
297 const Seq_RAIter
& __seqs_end
,
298 const _RAIter
& __target
,
300 _DiffType __length_am
) const
302 multiway_merge(__seqs_begin
, __seqs_end
, __target
, __length_am
, __comp
,
307 /** @brief PMWMS code executed by each thread.
308 * @param __sd Pointer to algorithm data.
309 * @param __comp Comparator.
311 template<bool __stable
, bool __exact
, typename _RAIter
,
314 parallel_sort_mwms_pu(_PMWMSSortingData
<_RAIter
>* __sd
,
317 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
318 typedef typename
_TraitsType::value_type _ValueType
;
319 typedef typename
_TraitsType::difference_type _DifferenceType
;
321 _ThreadIndex __iam
= omp_get_thread_num();
323 // Length of this thread's chunk, before merging.
324 _DifferenceType __length_local
325 = __sd
->_M_starts
[__iam
+ 1] - __sd
->_M_starts
[__iam
];
327 // Sort in temporary storage, leave space for sentinel.
329 typedef _ValueType
* _SortingPlacesIterator
;
331 __sd
->_M_temporary
[__iam
] =
332 static_cast<_ValueType
*>(::operator new(sizeof(_ValueType
)
333 * (__length_local
+ 1)));
336 std::uninitialized_copy(__sd
->_M_source
+ __sd
->_M_starts
[__iam
],
337 __sd
->_M_source
+ __sd
->_M_starts
[__iam
]
339 __sd
->_M_temporary
[__iam
]);
341 __possibly_stable_sort
<__stable
, _SortingPlacesIterator
, _Compare
>()
342 (__sd
->_M_temporary
[__iam
],
343 __sd
->_M_temporary
[__iam
] + __length_local
,
346 // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
347 // __sd->_M_temporary[__iam] + __length_local.
349 // No barrier here: Synchronization is done by the splitting routine.
351 _DifferenceType __num_samples
=
352 _Settings::get().sort_mwms_oversampling
* __sd
->_M_num_threads
- 1;
354 <__exact
, _RAIter
, _Compare
, _SortingPlacesIterator
>()
355 (__iam
, __sd
, __comp
, __num_samples
);
357 // Offset from __target __begin, __length after merging.
358 _DifferenceType __offset
= 0, __length_am
= 0;
359 for (_ThreadIndex __s
= 0; __s
< __sd
->_M_num_threads
; __s
++)
361 __length_am
+= (__sd
->_M_pieces
[__iam
][__s
]._M_end
362 - __sd
->_M_pieces
[__iam
][__s
]._M_begin
);
363 __offset
+= __sd
->_M_pieces
[__iam
][__s
]._M_begin
;
367 std::pair
<_SortingPlacesIterator
, _SortingPlacesIterator
> >
369 _SeqVector
seqs(__sd
->_M_num_threads
);
371 for (int __s
= 0; __s
< __sd
->_M_num_threads
; ++__s
)
375 (__sd
->_M_temporary
[__s
] + __sd
->_M_pieces
[__iam
][__s
]._M_begin
,
376 __sd
->_M_temporary
[__s
] + __sd
->_M_pieces
[__iam
][__s
]._M_end
);
379 __possibly_stable_multiway_merge
<
381 typename
_SeqVector::iterator
,
383 _Compare
, _DifferenceType
>()
384 (seqs
.begin(), seqs
.end(),
385 __sd
->_M_source
+ __offset
, __comp
,
390 ::operator delete(__sd
->_M_temporary
[__iam
]);
393 /** @brief PMWMS main call.
394 * @param __begin Begin iterator of sequence.
395 * @param __end End iterator of sequence.
396 * @param __comp Comparator.
397 * @param __n Length of sequence.
398 * @param __num_threads Number of threads to use.
400 template<bool __stable
, bool __exact
, typename _RAIter
,
403 parallel_sort_mwms(_RAIter __begin
, _RAIter __end
,
405 _ThreadIndex __num_threads
)
407 _GLIBCXX_CALL(__end
- __begin
)
409 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
410 typedef typename
_TraitsType::value_type _ValueType
;
411 typedef typename
_TraitsType::difference_type _DifferenceType
;
413 _DifferenceType __n
= __end
- __begin
;
418 // at least one element per thread
419 if (__num_threads
> __n
)
420 __num_threads
= static_cast<_ThreadIndex
>(__n
);
423 _PMWMSSortingData
<_RAIter
> __sd
;
424 _DifferenceType
* _M_starts
;
426 # pragma omp parallel num_threads(__num_threads)
428 __num_threads
= omp_get_num_threads(); //no more threads than requested
432 __sd
._M_num_threads
= __num_threads
;
433 __sd
._M_source
= __begin
;
435 __sd
._M_temporary
= new _ValueType
*[__num_threads
];
439 _DifferenceType __size
=
440 (_Settings::get().sort_mwms_oversampling
* __num_threads
- 1)
442 __sd
._M_samples
= static_cast<_ValueType
*>
443 (::operator new(__size
* sizeof(_ValueType
)));
446 __sd
._M_samples
= NULL
;
448 __sd
._M_offsets
= new _DifferenceType
[__num_threads
- 1];
450 = new std::vector
<_Piece
<_DifferenceType
> >[__num_threads
];
451 for (int __s
= 0; __s
< __num_threads
; ++__s
)
452 __sd
._M_pieces
[__s
].resize(__num_threads
);
453 _M_starts
= __sd
._M_starts
454 = new _DifferenceType
[__num_threads
+ 1];
456 _DifferenceType __chunk_length
= __n
/ __num_threads
;
457 _DifferenceType __split
= __n
% __num_threads
;
458 _DifferenceType __pos
= 0;
459 for (int __i
= 0; __i
< __num_threads
; ++__i
)
461 _M_starts
[__i
] = __pos
;
462 __pos
+= (__i
< __split
)
463 ? (__chunk_length
+ 1) : __chunk_length
;
465 _M_starts
[__num_threads
] = __pos
;
468 // Now sort in parallel.
469 parallel_sort_mwms_pu
<__stable
, __exact
>(&__sd
, __comp
);
473 delete[] __sd
._M_temporary
;
476 ::operator delete(__sd
._M_samples
);
478 delete[] __sd
._M_offsets
;
479 delete[] __sd
._M_pieces
;
482 } //namespace __gnu_parallel
484 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */