multiway_merge.h: Simple formatting and uglification fixes.
[gcc.git] / libstdc++-v3 / include / parallel / multiway_mergesort.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /** @file parallel/multiway_mergesort.h
26 * @brief Parallel multiway merge sort.
27 * This file is a GNU parallel extension to the Standard C++ Library.
28 */
29
30 // Written by Johannes Singler.
31
32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
34
35 #include <vector>
36
37 #include <parallel/basic_iterator.h>
38 #include <bits/stl_algo.h>
39 #include <parallel/parallel.h>
40 #include <parallel/multiway_merge.h>
41
42 namespace __gnu_parallel
43 {
44 /** @brief Subsequence description. */
45 template<typename _DifferenceTp>
46 struct _Piece
47 {
48 typedef _DifferenceTp _DifferenceType;
49
50 /** @brief Begin of subsequence. */
51 _DifferenceType _M_begin;
52
53 /** @brief End of subsequence. */
54 _DifferenceType _M_end;
55 };
56
57 /** @brief Data accessed by all threads.
58 *
59 * PMWMS = parallel multiway mergesort */
60 template<typename _RAIter>
61 struct _PMWMSSortingData
62 {
63 typedef std::iterator_traits<_RAIter> _TraitsType;
64 typedef typename _TraitsType::value_type _ValueType;
65 typedef typename _TraitsType::difference_type _DifferenceType;
66
67 /** @brief Number of threads involved. */
68 _ThreadIndex _M_num_threads;
69
70 /** @brief Input __begin. */
71 _RAIter _M_source;
72
73 /** @brief Start indices, per thread. */
74 _DifferenceType* _M_starts;
75
76 /** @brief Storage in which to sort. */
77 _ValueType** _M_temporary;
78
79 /** @brief Samples. */
80 _ValueType* _M_samples;
81
82 /** @brief Offsets to add to the found positions. */
83 _DifferenceType* _M_offsets;
84
85 /** @brief Pieces of data to merge @__c [thread][__sequence] */
86 std::vector<_Piece<_DifferenceType> >* _M_pieces;
87 };
88
89 /**
90 * @brief Select _M_samples from a sequence.
91 * @param __sd Pointer to algorithm data. _Result will be placed in
92 * @__c __sd->_M_samples.
93 * @param __num_samples Number of _M_samples to select.
94 */
95 template<typename _RAIter, typename _DifferenceTp>
96 void
97 __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
98 _DifferenceTp __num_samples)
99 {
100 typedef std::iterator_traits<_RAIter> _TraitsType;
101 typedef typename _TraitsType::value_type _ValueType;
102 typedef _DifferenceTp _DifferenceType;
103
104 _ThreadIndex __iam = omp_get_thread_num();
105
106 _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
107
108 equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
109 __num_samples + 1, __es);
110
111 for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
112 ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
113 _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
114 + __es[__i + 1]]);
115
116 delete[] __es;
117 }
118
119 /** @brief Split consistently. */
120 template<bool __exact, typename _RAIter,
121 typename _Compare, typename _SortingPlacesIterator>
122 struct _SplitConsistently
123 { };
124
125 /** @brief Split by exact splitting. */
126 template<typename _RAIter, typename _Compare,
127 typename _SortingPlacesIterator>
128 struct _SplitConsistently<true, _RAIter,
129 _Compare, _SortingPlacesIterator>
130 {
131 void
132 operator()(const _ThreadIndex __iam,
133 _PMWMSSortingData<_RAIter>* __sd,
134 _Compare& __comp,
135 const typename
136 std::iterator_traits<_RAIter>::difference_type
137 __num_samples) const
138 {
139 # pragma omp barrier
140
141 std::vector<std::pair<_SortingPlacesIterator,
142 _SortingPlacesIterator> >
143 seqs(__sd->_M_num_threads);
144 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
145 seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
146 __sd->_M_temporary[__s]
147 + (__sd->_M_starts[__s + 1]
148 - __sd->_M_starts[__s]));
149
150 std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
151
152 // if not last thread
153 if (__iam < __sd->_M_num_threads - 1)
154 multiseq_partition(seqs.begin(), seqs.end(),
155 __sd->_M_starts[__iam + 1], _M_offsets.begin(),
156 __comp);
157
158 for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
159 {
160 // for each sequence
161 if (__iam < (__sd->_M_num_threads - 1))
162 __sd->_M_pieces[__iam][__seq]._M_end
163 = _M_offsets[__seq] - seqs[__seq].first;
164 else
165 // very end of this sequence
166 __sd->_M_pieces[__iam][__seq]._M_end =
167 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
168 }
169
170 # pragma omp barrier
171
172 for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
173 {
174 // For each sequence.
175 if (__iam > 0)
176 __sd->_M_pieces[__iam][__seq]._M_begin =
177 __sd->_M_pieces[__iam - 1][__seq]._M_end;
178 else
179 // Absolute beginning.
180 __sd->_M_pieces[__iam][__seq]._M_begin = 0;
181 }
182 }
183 };
184
185 /** @brief Split by sampling. */
186 template<typename _RAIter, typename _Compare,
187 typename _SortingPlacesIterator>
188 struct _SplitConsistently<false, _RAIter, _Compare,
189 _SortingPlacesIterator>
190 {
191 void
192 operator()(const _ThreadIndex __iam,
193 _PMWMSSortingData<_RAIter>* __sd,
194 _Compare& __comp,
195 const typename
196 std::iterator_traits<_RAIter>::difference_type
197 __num_samples) const
198 {
199 typedef std::iterator_traits<_RAIter> _TraitsType;
200 typedef typename _TraitsType::value_type _ValueType;
201 typedef typename _TraitsType::difference_type _DifferenceType;
202
203 __determine_samples(__sd, __num_samples);
204
205 # pragma omp barrier
206
207 # pragma omp single
208 __gnu_sequential::sort(__sd->_M_samples,
209 __sd->_M_samples
210 + (__num_samples * __sd->_M_num_threads),
211 __comp);
212
213 # pragma omp barrier
214
215 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
216 {
217 // For each sequence.
218 if (__num_samples * __iam > 0)
219 __sd->_M_pieces[__iam][__s]._M_begin =
220 std::lower_bound(__sd->_M_temporary[__s],
221 __sd->_M_temporary[__s]
222 + (__sd->_M_starts[__s + 1]
223 - __sd->_M_starts[__s]),
224 __sd->_M_samples[__num_samples * __iam],
225 __comp)
226 - __sd->_M_temporary[__s];
227 else
228 // Absolute beginning.
229 __sd->_M_pieces[__iam][__s]._M_begin = 0;
230
231 if ((__num_samples * (__iam + 1)) <
232 (__num_samples * __sd->_M_num_threads))
233 __sd->_M_pieces[__iam][__s]._M_end =
234 std::lower_bound(__sd->_M_temporary[__s],
235 __sd->_M_temporary[__s]
236 + (__sd->_M_starts[__s + 1]
237 - __sd->_M_starts[__s]),
238 __sd->_M_samples[__num_samples * (__iam + 1)],
239 __comp)
240 - __sd->_M_temporary[__s];
241 else
242 // Absolute end.
243 __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
244 - __sd->_M_starts[__s]);
245 }
246 }
247 };
248
249 template<bool __stable, typename _RAIter, typename _Compare>
250 struct __possibly_stable_sort
251 { };
252
253 template<typename _RAIter, typename _Compare>
254 struct __possibly_stable_sort<true, _RAIter, _Compare>
255 {
256 void operator()(const _RAIter& __begin,
257 const _RAIter& __end, _Compare& __comp) const
258 { __gnu_sequential::stable_sort(__begin, __end, __comp); }
259 };
260
261 template<typename _RAIter, typename _Compare>
262 struct __possibly_stable_sort<false, _RAIter, _Compare>
263 {
264 void operator()(const _RAIter __begin,
265 const _RAIter __end, _Compare& __comp) const
266 { __gnu_sequential::sort(__begin, __end, __comp); }
267 };
268
269 template<bool __stable, typename Seq_RAIter,
270 typename _RAIter, typename _Compare,
271 typename DiffType>
272 struct __possibly_stable_multiway_merge
273 { };
274
275 template<typename Seq_RAIter, typename _RAIter,
276 typename _Compare, typename _DiffType>
277 struct __possibly_stable_multiway_merge<true, Seq_RAIter,
278 _RAIter, _Compare, _DiffType>
279 {
280 void operator()(const Seq_RAIter& __seqs_begin,
281 const Seq_RAIter& __seqs_end,
282 const _RAIter& __target,
283 _Compare& __comp,
284 _DiffType __length_am) const
285 {
286 stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
287 __comp, sequential_tag());
288 }
289 };
290
291 template<typename Seq_RAIter, typename _RAIter,
292 typename _Compare, typename _DiffType>
293 struct __possibly_stable_multiway_merge<false, Seq_RAIter,
294 _RAIter, _Compare, _DiffType>
295 {
296 void operator()(const Seq_RAIter& __seqs_begin,
297 const Seq_RAIter& __seqs_end,
298 const _RAIter& __target,
299 _Compare& __comp,
300 _DiffType __length_am) const
301 {
302 multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
303 sequential_tag());
304 }
305 };
306
307 /** @brief PMWMS code executed by each thread.
308 * @param __sd Pointer to algorithm data.
309 * @param __comp Comparator.
310 */
311 template<bool __stable, bool __exact, typename _RAIter,
312 typename _Compare>
313 void
314 parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
315 _Compare& __comp)
316 {
317 typedef std::iterator_traits<_RAIter> _TraitsType;
318 typedef typename _TraitsType::value_type _ValueType;
319 typedef typename _TraitsType::difference_type _DifferenceType;
320
321 _ThreadIndex __iam = omp_get_thread_num();
322
323 // Length of this thread's chunk, before merging.
324 _DifferenceType __length_local
325 = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
326
327 // Sort in temporary storage, leave space for sentinel.
328
329 typedef _ValueType* _SortingPlacesIterator;
330
331 __sd->_M_temporary[__iam] =
332 static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
333 * (__length_local + 1)));
334
335 // Copy there.
336 std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
337 __sd->_M_source + __sd->_M_starts[__iam]
338 + __length_local,
339 __sd->_M_temporary[__iam]);
340
341 __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
342 (__sd->_M_temporary[__iam],
343 __sd->_M_temporary[__iam] + __length_local,
344 __comp);
345
346 // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
347 // __sd->_M_temporary[__iam] + __length_local.
348
349 // No barrier here: Synchronization is done by the splitting routine.
350
351 _DifferenceType __num_samples =
352 _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
353 _SplitConsistently
354 <__exact, _RAIter, _Compare, _SortingPlacesIterator>()
355 (__iam, __sd, __comp, __num_samples);
356
357 // Offset from __target __begin, __length after merging.
358 _DifferenceType __offset = 0, __length_am = 0;
359 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
360 {
361 __length_am += (__sd->_M_pieces[__iam][__s]._M_end
362 - __sd->_M_pieces[__iam][__s]._M_begin);
363 __offset += __sd->_M_pieces[__iam][__s]._M_begin;
364 }
365
366 typedef std::vector<
367 std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
368 _SeqVector;
369 _SeqVector seqs(__sd->_M_num_threads);
370
371 for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
372 {
373 seqs[__s] =
374 std::make_pair
375 (__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
376 __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
377 }
378
379 __possibly_stable_multiway_merge<
380 __stable,
381 typename _SeqVector::iterator,
382 _RAIter,
383 _Compare, _DifferenceType>()
384 (seqs.begin(), seqs.end(),
385 __sd->_M_source + __offset, __comp,
386 __length_am);
387
388 # pragma omp barrier
389
390 ::operator delete(__sd->_M_temporary[__iam]);
391 }
392
393 /** @brief PMWMS main call.
394 * @param __begin Begin iterator of sequence.
395 * @param __end End iterator of sequence.
396 * @param __comp Comparator.
397 * @param __n Length of sequence.
398 * @param __num_threads Number of threads to use.
399 */
400 template<bool __stable, bool __exact, typename _RAIter,
401 typename _Compare>
402 void
403 parallel_sort_mwms(_RAIter __begin, _RAIter __end,
404 _Compare __comp,
405 _ThreadIndex __num_threads)
406 {
407 _GLIBCXX_CALL(__end - __begin)
408
409 typedef std::iterator_traits<_RAIter> _TraitsType;
410 typedef typename _TraitsType::value_type _ValueType;
411 typedef typename _TraitsType::difference_type _DifferenceType;
412
413 _DifferenceType __n = __end - __begin;
414
415 if (__n <= 1)
416 return;
417
418 // at least one element per thread
419 if (__num_threads > __n)
420 __num_threads = static_cast<_ThreadIndex>(__n);
421
422 // shared variables
423 _PMWMSSortingData<_RAIter> __sd;
424 _DifferenceType* _M_starts;
425
426 # pragma omp parallel num_threads(__num_threads)
427 {
428 __num_threads = omp_get_num_threads(); //no more threads than requested
429
430 # pragma omp single
431 {
432 __sd._M_num_threads = __num_threads;
433 __sd._M_source = __begin;
434
435 __sd._M_temporary = new _ValueType*[__num_threads];
436
437 if (!__exact)
438 {
439 _DifferenceType __size =
440 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
441 * __num_threads;
442 __sd._M_samples = static_cast<_ValueType*>
443 (::operator new(__size * sizeof(_ValueType)));
444 }
445 else
446 __sd._M_samples = NULL;
447
448 __sd._M_offsets = new _DifferenceType[__num_threads - 1];
449 __sd._M_pieces
450 = new std::vector<_Piece<_DifferenceType> >[__num_threads];
451 for (int __s = 0; __s < __num_threads; ++__s)
452 __sd._M_pieces[__s].resize(__num_threads);
453 _M_starts = __sd._M_starts
454 = new _DifferenceType[__num_threads + 1];
455
456 _DifferenceType __chunk_length = __n / __num_threads;
457 _DifferenceType __split = __n % __num_threads;
458 _DifferenceType __pos = 0;
459 for (int __i = 0; __i < __num_threads; ++__i)
460 {
461 _M_starts[__i] = __pos;
462 __pos += (__i < __split)
463 ? (__chunk_length + 1) : __chunk_length;
464 }
465 _M_starts[__num_threads] = __pos;
466 } //single
467
468 // Now sort in parallel.
469 parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
470 } //parallel
471
472 delete[] _M_starts;
473 delete[] __sd._M_temporary;
474
475 if (!__exact)
476 ::operator delete(__sd._M_samples);
477
478 delete[] __sd._M_offsets;
479 delete[] __sd._M_pieces;
480 }
481
482 } //namespace __gnu_parallel
483
484 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */