base.h (_EqualFromLess): Correct uglification of member variables and method arguments.
[gcc.git] / libstdc++-v3 / include / parallel / multiway_mergesort.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /** @file parallel/multiway_mergesort.h
26 * @brief Parallel multiway merge sort.
27 * This file is a GNU parallel extension to the Standard C++ Library.
28 */
29
30 // Written by Johannes Singler.
31
32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
34
35 #include <vector>
36
37 #include <parallel/basic_iterator.h>
38 #include <bits/stl_algo.h>
39 #include <parallel/parallel.h>
40 #include <parallel/multiway_merge.h>
41
42 namespace __gnu_parallel
43 {
44
45 /** @brief Subsequence description. */
46 template<typename _DifferenceTp>
47 struct _Piece
48 {
49 typedef _DifferenceTp _DifferenceType;
50
51 /** @brief Begin of subsequence. */
52 _DifferenceType _M_begin;
53
54 /** @brief End of subsequence. */
55 _DifferenceType _M_end;
56 };
57
58 /** @brief Data accessed by all threads.
59 *
60 * PMWMS = parallel multiway mergesort */
61 template<typename _RAIter>
62 struct _PMWMSSortingData
63 {
64 typedef std::iterator_traits<_RAIter> _TraitsType;
65 typedef typename _TraitsType::value_type _ValueType;
66 typedef typename _TraitsType::difference_type _DifferenceType;
67
68 /** @brief Number of threads involved. */
69 _ThreadIndex _M_num_threads;
70
71 /** @brief Input __begin. */
72 _RAIter _M_source;
73
74 /** @brief Start indices, per thread. */
75 _DifferenceType* _M_starts;
76
77 /** @brief Storage in which to sort. */
78 _ValueType** _M_temporary;
79
80 /** @brief Samples. */
81 _ValueType* _M_samples;
82
83 /** @brief Offsets to add to the found positions. */
84 _DifferenceType* _M_offsets;
85
86 /** @brief Pieces of data to merge @__c [thread][__sequence] */
87 std::vector<_Piece<_DifferenceType> >* _M_pieces;
88 };
89
90 /**
91 * @brief Select _M_samples from a sequence.
92 * @param __sd Pointer to algorithm data. _Result will be placed in
93 * @__c __sd->_M_samples.
94 * @param __num_samples Number of _M_samples to select.
95 */
96 template<typename _RAIter, typename _DifferenceTp>
97 void
98 __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
99 _DifferenceTp __num_samples)
100 {
101 typedef std::iterator_traits<_RAIter> _TraitsType;
102 typedef typename _TraitsType::value_type _ValueType;
103 typedef _DifferenceTp _DifferenceType;
104
105 _ThreadIndex __iam = omp_get_thread_num();
106
107 _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
108
109 equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
110 __num_samples + 1, __es);
111
112 for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
113 ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
114 _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]);
115
116 delete[] __es;
117 }
118
119 /** @brief Split consistently. */
120 template<bool __exact, typename _RAIter,
121 typename _Compare, typename _SortingPlacesIterator>
122 struct _SplitConsistently
123 {
124 };
125
126 /** @brief Split by exact splitting. */
127 template<typename _RAIter, typename _Compare,
128 typename _SortingPlacesIterator>
129 struct _SplitConsistently
130 <true, _RAIter, _Compare, _SortingPlacesIterator>
131 {
132 void operator()(
133 const _ThreadIndex __iam,
134 _PMWMSSortingData<_RAIter>* __sd,
135 _Compare& __comp,
136 const typename
137 std::iterator_traits<_RAIter>::difference_type
138 __num_samples)
139 const
140 {
141 # pragma omp barrier
142
143 std::vector<std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
144 seqs(__sd->_M_num_threads);
145 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
146 seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
147 __sd->_M_temporary[__s]
148 + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]));
149
150 std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
151
152 // if not last thread
153 if (__iam < __sd->_M_num_threads - 1)
154 multiseq_partition(seqs.begin(), seqs.end(),
155 __sd->_M_starts[__iam + 1], _M_offsets.begin(), __comp);
156
157 for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
158 {
159 // for each sequence
160 if (__iam < (__sd->_M_num_threads - 1))
161 __sd->_M_pieces[__iam][__seq]._M_end = _M_offsets[__seq] - seqs[__seq].first;
162 else
163 // very end of this sequence
164 __sd->_M_pieces[__iam][__seq]._M_end =
165 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
166 }
167
168 # pragma omp barrier
169
170 for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
171 {
172 // For each sequence.
173 if (__iam > 0)
174 __sd->_M_pieces[__iam][__seq]._M_begin = __sd->_M_pieces[__iam - 1][__seq]._M_end;
175 else
176 // Absolute beginning.
177 __sd->_M_pieces[__iam][__seq]._M_begin = 0;
178 }
179 }
180 };
181
182 /** @brief Split by sampling. */
183 template<typename _RAIter, typename _Compare,
184 typename _SortingPlacesIterator>
185 struct _SplitConsistently<false, _RAIter, _Compare,
186 _SortingPlacesIterator>
187 {
188 void operator()(
189 const _ThreadIndex __iam,
190 _PMWMSSortingData<_RAIter>* __sd,
191 _Compare& __comp,
192 const typename
193 std::iterator_traits<_RAIter>::difference_type
194 __num_samples)
195 const
196 {
197 typedef std::iterator_traits<_RAIter> _TraitsType;
198 typedef typename _TraitsType::value_type _ValueType;
199 typedef typename _TraitsType::difference_type _DifferenceType;
200
201 __determine_samples(__sd, __num_samples);
202
203 # pragma omp barrier
204
205 # pragma omp single
206 __gnu_sequential::sort(__sd->_M_samples,
207 __sd->_M_samples + (__num_samples * __sd->_M_num_threads),
208 __comp);
209
210 # pragma omp barrier
211
212 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
213 {
214 // For each sequence.
215 if (__num_samples * __iam > 0)
216 __sd->_M_pieces[__iam][__s]._M_begin =
217 std::lower_bound(__sd->_M_temporary[__s],
218 __sd->_M_temporary[__s]
219 + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
220 __sd->_M_samples[__num_samples * __iam],
221 __comp)
222 - __sd->_M_temporary[__s];
223 else
224 // Absolute beginning.
225 __sd->_M_pieces[__iam][__s]._M_begin = 0;
226
227 if ((__num_samples * (__iam + 1)) < (__num_samples * __sd->_M_num_threads))
228 __sd->_M_pieces[__iam][__s]._M_end =
229 std::lower_bound(__sd->_M_temporary[__s],
230 __sd->_M_temporary[__s]
231 + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
232 __sd->_M_samples[__num_samples * (__iam + 1)],
233 __comp)
234 - __sd->_M_temporary[__s];
235 else
236 // Absolute __end.
237 __sd->_M_pieces[__iam][__s]._M_end = __sd->_M_starts[__s + 1] - __sd->_M_starts[__s];
238 }
239 }
240 };
241
242 template<bool __stable, typename _RAIter, typename _Compare>
243 struct __possibly_stable_sort
244 {
245 };
246
247 template<typename _RAIter, typename _Compare>
248 struct __possibly_stable_sort<true, _RAIter, _Compare>
249 {
250 void operator()(const _RAIter& __begin,
251 const _RAIter& __end, _Compare& __comp) const
252 {
253 __gnu_sequential::stable_sort(__begin, __end, __comp);
254 }
255 };
256
257 template<typename _RAIter, typename _Compare>
258 struct __possibly_stable_sort<false, _RAIter, _Compare>
259 {
260 void operator()(const _RAIter __begin,
261 const _RAIter __end, _Compare& __comp) const
262 {
263 __gnu_sequential::sort(__begin, __end, __comp);
264 }
265 };
266
267 template<bool __stable, typename Seq_RAIter,
268 typename _RAIter, typename _Compare,
269 typename DiffType>
270 struct __possibly_stable_multiway_merge
271 {
272 };
273
274 template<typename Seq_RAIter, typename _RAIter,
275 typename _Compare, typename DiffType>
276 struct __possibly_stable_multiway_merge
277 <true, Seq_RAIter, _RAIter, _Compare,
278 DiffType>
279 {
280 void operator()(const Seq_RAIter& __seqs_begin,
281 const Seq_RAIter& __seqs_end,
282 const _RAIter& __target,
283 _Compare& __comp,
284 DiffType __length_am) const
285 {
286 stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
287 sequential_tag());
288 }
289 };
290
291 template<typename Seq_RAIter, typename _RAIter,
292 typename _Compare, typename DiffType>
293 struct __possibly_stable_multiway_merge
294 <false, Seq_RAIter, _RAIter, _Compare,
295 DiffType>
296 {
297 void operator()(const Seq_RAIter& __seqs_begin,
298 const Seq_RAIter& __seqs_end,
299 const _RAIter& __target,
300 _Compare& __comp,
301 DiffType __length_am) const
302 {
303 multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
304 sequential_tag());
305 }
306 };
307
308 /** @brief PMWMS code executed by each thread.
309 * @param __sd Pointer to algorithm data.
310 * @param __comp Comparator.
311 */
312 template<bool __stable, bool __exact, typename _RAIter,
313 typename _Compare>
314 void
315 parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
316 _Compare& __comp)
317 {
318 typedef std::iterator_traits<_RAIter> _TraitsType;
319 typedef typename _TraitsType::value_type _ValueType;
320 typedef typename _TraitsType::difference_type _DifferenceType;
321
322 _ThreadIndex __iam = omp_get_thread_num();
323
324 // Length of this thread's chunk, before merging.
325 _DifferenceType __length_local = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
326
327 // Sort in temporary storage, leave space for sentinel.
328
329 typedef _ValueType* _SortingPlacesIterator;
330
331 __sd->_M_temporary[__iam] =
332 static_cast<_ValueType*>(
333 ::operator new(sizeof(_ValueType) * (__length_local + 1)));
334
335 // Copy there.
336 std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
337 __sd->_M_source + __sd->_M_starts[__iam] + __length_local,
338 __sd->_M_temporary[__iam]);
339
340 __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
341 (__sd->_M_temporary[__iam], __sd->_M_temporary[__iam] + __length_local, __comp);
342
343 // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
344 // __sd->_M_temporary[__iam] + __length_local.
345
346 // No barrier here: Synchronization is done by the splitting routine.
347
348 _DifferenceType __num_samples =
349 _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
350 _SplitConsistently
351 <__exact, _RAIter, _Compare, _SortingPlacesIterator>()
352 (__iam, __sd, __comp, __num_samples);
353
354 // Offset from __target __begin, __length after merging.
355 _DifferenceType __offset = 0, __length_am = 0;
356 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
357 {
358 __length_am += __sd->_M_pieces[__iam][__s]._M_end - __sd->_M_pieces[__iam][__s]._M_begin;
359 __offset += __sd->_M_pieces[__iam][__s]._M_begin;
360 }
361
362 typedef std::vector<
363 std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
364 seq_vector_type;
365 seq_vector_type seqs(__sd->_M_num_threads);
366
367 for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
368 {
369 seqs[__s] =
370 std::make_pair(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
371 __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
372 }
373
374 __possibly_stable_multiway_merge<
375 __stable,
376 typename seq_vector_type::iterator,
377 _RAIter,
378 _Compare, _DifferenceType>()
379 (seqs.begin(), seqs.end(),
380 __sd->_M_source + __offset, __comp,
381 __length_am);
382
383 # pragma omp barrier
384
385 ::operator delete(__sd->_M_temporary[__iam]);
386 }
387
388 /** @brief PMWMS main call.
389 * @param __begin Begin iterator of sequence.
390 * @param __end End iterator of sequence.
391 * @param __comp Comparator.
392 * @param __n Length of sequence.
393 * @param __num_threads Number of threads to use.
394 */
395 template<bool __stable, bool __exact, typename _RAIter,
396 typename _Compare>
397 void
398 parallel_sort_mwms(_RAIter __begin, _RAIter __end,
399 _Compare __comp,
400 _ThreadIndex __num_threads)
401 {
402 _GLIBCXX_CALL(__end - __begin)
403
404 typedef std::iterator_traits<_RAIter> _TraitsType;
405 typedef typename _TraitsType::value_type _ValueType;
406 typedef typename _TraitsType::difference_type _DifferenceType;
407
408 _DifferenceType __n = __end - __begin;
409
410 if (__n <= 1)
411 return;
412
413 // at least one element per thread
414 if (__num_threads > __n)
415 __num_threads = static_cast<_ThreadIndex>(__n);
416
417 // shared variables
418 _PMWMSSortingData<_RAIter> __sd;
419 _DifferenceType* _M_starts;
420
421 # pragma omp parallel num_threads(__num_threads)
422 {
423 __num_threads = omp_get_num_threads(); //no more threads than requested
424
425 # pragma omp single
426 {
427 __sd._M_num_threads = __num_threads;
428 __sd._M_source = __begin;
429
430 __sd._M_temporary = new _ValueType*[__num_threads];
431
432 if (!__exact)
433 {
434 _DifferenceType size =
435 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
436 * __num_threads;
437 __sd._M_samples = static_cast<_ValueType*>(
438 ::operator new(size * sizeof(_ValueType)));
439 }
440 else
441 __sd._M_samples = NULL;
442
443 __sd._M_offsets = new _DifferenceType[__num_threads - 1];
444 __sd._M_pieces = new std::vector<_Piece<_DifferenceType> >[__num_threads];
445 for (int __s = 0; __s < __num_threads; ++__s)
446 __sd._M_pieces[__s].resize(__num_threads);
447 _M_starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
448
449 _DifferenceType __chunk_length = __n / __num_threads;
450 _DifferenceType __split = __n % __num_threads;
451 _DifferenceType __pos = 0;
452 for (int __i = 0; __i < __num_threads; ++__i)
453 {
454 _M_starts[__i] = __pos;
455 __pos += (__i < __split) ? (__chunk_length + 1) : __chunk_length;
456 }
457 _M_starts[__num_threads] = __pos;
458 } //single
459
460 // Now sort in parallel.
461 parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
462 } //parallel
463
464 delete[] _M_starts;
465 delete[] __sd._M_temporary;
466
467 if (!__exact)
468 ::operator delete(__sd._M_samples);
469
470 delete[] __sd._M_offsets;
471 delete[] __sd._M_pieces;
472 }
473 } //namespace __gnu_parallel
474
475 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */