re PR libstdc++/34095 (parallel mode: segfault in std::sort)
[gcc.git] / libstdc++-v3 / include / parallel / multiway_merge.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
20
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
29 // Public License.
30
31 /** @file parallel/multiway_merge.h
32 * @brief Implementation of sequential and parallel multiway merge.
33 *
34 * Explanations on the high-speed merging routines in the appendix of
35 *
36 * P. Sanders.
37 * Fast priority queues for cached memory.
38 * ACM Journal of Experimental Algorithmics, 5, 2000.
39 *
40 * This file is a GNU parallel extension to the Standard C++ Library.
41 */
42
43 // Written by Johannes Singler.
44
45 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
46 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
47
48 #include <vector>
49
50 #include <bits/stl_algo.h>
51 #include <parallel/features.h>
52 #include <parallel/parallel.h>
53 #include <parallel/merge.h>
54 #include <parallel/losertree.h>
55 #if _GLIBCXX_ASSERTIONS
56 #include <parallel/checkers.h>
57 #endif
58
59 /** @brief Length of a sequence described by a pair of iterators. */
60 #define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first)
61
62 // XXX need iterator typedefs
63 namespace __gnu_parallel
64 {
65 template<typename RandomAccessIterator, typename Comparator>
66 class guarded_iterator;
67
68 template<typename RandomAccessIterator, typename Comparator>
69 inline bool
70 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
71 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
72
73 template<typename RandomAccessIterator, typename Comparator>
74 inline bool
75 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
76 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
77
78 /** @brief Iterator wrapper supporting an implicit supremum at the end
79 of the sequence, dominating all comparisons.
80 * Deriving from RandomAccessIterator is not possible since
81 * RandomAccessIterator need not be a class.
82 */
83 template<typename RandomAccessIterator, typename Comparator>
84 class guarded_iterator
85 {
86 private:
87 /** @brief Current iterator position. */
88 RandomAccessIterator current;
89
90 /** @brief End iterator of the sequence. */
91 RandomAccessIterator end;
92
93 /** @brief Comparator. */
94 Comparator& comp;
95
96 public:
97 /** @brief Constructor. Sets iterator to beginning of sequence.
98 * @param begin Begin iterator of sequence.
99 * @param end End iterator of sequence.
100 * @param comp Comparator provided for associated overloaded
101 * compare operators. */
102 inline guarded_iterator(RandomAccessIterator begin,
103 RandomAccessIterator end, Comparator& comp)
104 : current(begin), end(end), comp(comp)
105 { }
106
107 /** @brief Pre-increment operator.
108 * @return This. */
109 inline guarded_iterator<RandomAccessIterator, Comparator>&
110 operator++()
111 {
112 ++current;
113 return *this;
114 }
115
116 /** @brief Dereference operator.
117 * @return Referenced element. */
118 inline typename std::iterator_traits<RandomAccessIterator>::value_type
119 operator*()
120 { return *current; }
121
122 /** @brief Convert to wrapped iterator.
123 * @return Wrapped iterator. */
124 inline operator RandomAccessIterator()
125 { return current; }
126
127 friend bool
128 operator< <RandomAccessIterator, Comparator>(
129 guarded_iterator<RandomAccessIterator, Comparator>& bi1,
130 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
131
132 friend bool
133 operator<= <RandomAccessIterator, Comparator>(
134 guarded_iterator<RandomAccessIterator, Comparator>& bi1,
135 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
136 };
137
138 /** @brief Compare two elements referenced by guarded iterators.
139 * @param bi1 First iterator.
140 * @param bi2 Second iterator.
141 * @return @c True if less. */
142 template<typename RandomAccessIterator, typename Comparator>
143 inline bool
144 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
145 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
146 {
147 if (bi1.current == bi1.end) //bi1 is sup
148 return bi2.current == bi2.end; //bi2 is not sup
149 if (bi2.current == bi2.end) //bi2 is sup
150 return true;
151 return (bi1.comp)(*bi1, *bi2); //normal compare
152 }
153
154 /** @brief Compare two elements referenced by guarded iterators.
155 * @param bi1 First iterator.
156 * @param bi2 Second iterator.
157 * @return @c True if less equal. */
158 template<typename RandomAccessIterator, typename Comparator>
159 inline bool
160 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
161 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
162 {
163 if (bi2.current == bi2.end) //bi1 is sup
164 return bi1.current != bi1.end; //bi2 is not sup
165 if (bi1.current == bi1.end) //bi2 is sup
166 return false;
167 return !(bi1.comp)(*bi2, *bi1); //normal compare
168 }
169
170 template<typename RandomAccessIterator, typename Comparator>
171 class unguarded_iterator;
172
173 template<typename RandomAccessIterator, typename Comparator>
174 inline bool
175 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
176 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
177
178 template<typename RandomAccessIterator, typename Comparator>
179 inline bool
180 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
181 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
182
183 template<typename RandomAccessIterator, typename Comparator>
184 class unguarded_iterator
185 {
186 private:
187 /** @brief Current iterator position. */
188 RandomAccessIterator& current;
189 /** @brief Comparator. */
190 mutable Comparator& comp;
191
192 public:
193 /** @brief Constructor. Sets iterator to beginning of sequence.
194 * @param begin Begin iterator of sequence.
195 * @param end Unused, only for compatibility.
196 * @param comp Unused, only for compatibility. */
197 inline unguarded_iterator(RandomAccessIterator begin,
198 RandomAccessIterator end, Comparator& comp)
199 : current(begin), comp(comp)
200 { }
201
202 /** @brief Pre-increment operator.
203 * @return This. */
204 inline unguarded_iterator<RandomAccessIterator, Comparator>&
205 operator++()
206 {
207 ++current;
208 return *this;
209 }
210
211 /** @brief Dereference operator.
212 * @return Referenced element. */
213 inline typename std::iterator_traits<RandomAccessIterator>::value_type
214 operator*()
215 { return *current; }
216
217 /** @brief Convert to wrapped iterator.
218 * @return Wrapped iterator. */
219 inline
220 operator RandomAccessIterator()
221 { return current; }
222
223 friend bool
224 operator< <RandomAccessIterator, Comparator>(
225 unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
226 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
227
228 friend bool
229 operator<= <RandomAccessIterator, Comparator>(
230 unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
231 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
232 };
233
234 /** @brief Compare two elements referenced by unguarded iterators.
235 * @param bi1 First iterator.
236 * @param bi2 Second iterator.
237 * @return @c True if less. */
238 template<typename RandomAccessIterator, typename Comparator>
239 inline bool
240 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
241 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
242 {
243 // Normal compare.
244 return (bi1.comp)(*bi1, *bi2);
245 }
246
247 /** @brief Compare two elements referenced by unguarded iterators.
248 * @param bi1 First iterator.
249 * @param bi2 Second iterator.
250 * @return @c True if less equal. */
251 template<typename RandomAccessIterator, typename Comparator>
252 inline bool
253 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
254 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
255 {
256 // Normal compare.
257 return !(bi1.comp)(*bi2, *bi1);
258 }
259
260 /** Prepare a set of sequences to be merged without a (end) guard
261 * @param seqs_begin
262 * @param seqs_end
263 * @param comp
264 * @param min_sequence
265 * @param stable
266 * @pre (seqs_end - seqs_begin > 0) */
267 template<typename RandomAccessIteratorIterator, typename Comparator>
268 typename std::iterator_traits<
269 typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
270 ::first_type>::difference_type
271 prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
272 RandomAccessIteratorIterator seqs_end, Comparator comp,
273 int& min_sequence, bool stable)
274 {
275 _GLIBCXX_CALL(seqs_end - seqs_begin)
276
277 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
278 ::value_type::first_type
279 RandomAccessIterator1;
280 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
281 value_type;
282 typedef typename std::iterator_traits<RandomAccessIterator1>
283 ::difference_type
284 difference_type;
285
286 if ((*seqs_begin).first == (*seqs_begin).second)
287 {
288 // Empty sequence found, it's the first one.
289 min_sequence = 0;
290 return -1;
291 }
292
293 // Last element in sequence.
294 value_type min = *((*seqs_begin).second - 1);
295 min_sequence = 0;
296 for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
297 {
298 if ((*s).first == (*s).second)
299 {
300 // Empty sequence found.
301 min_sequence = static_cast<int>(s - seqs_begin);
302 return -1;
303 }
304
305 // Last element in sequence.
306 const value_type& v = *((*s).second - 1);
307 if (comp(v, min)) //strictly smaller
308 {
309 min = v;
310 min_sequence = static_cast<int>(s - seqs_begin);
311 }
312 }
313
314 difference_type overhang_size = 0;
315
316 int s = 0;
317 for (s = 0; s <= min_sequence; ++s)
318 {
319 RandomAccessIterator1 split;
320 if (stable)
321 split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
322 min, comp);
323 else
324 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
325 min, comp);
326
327 overhang_size += seqs_begin[s].second - split;
328 }
329
330 for (; s < (seqs_end - seqs_begin); ++s)
331 {
332 RandomAccessIterator1 split = std::lower_bound(
333 seqs_begin[s].first, seqs_begin[s].second, min, comp);
334 overhang_size += seqs_begin[s].second - split;
335 }
336
337 // So many elements will be left over afterwards.
338 return overhang_size;
339 }
340
341 /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
342 * @param seqs_begin
343 * @param seqs_end
344 * @param comp */
345 template<typename RandomAccessIteratorIterator, typename Comparator>
346 typename std::iterator_traits<typename std::iterator_traits<
347 RandomAccessIteratorIterator>::value_type::first_type>::difference_type
348 prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
349 RandomAccessIteratorIterator seqs_end,
350 Comparator comp)
351 {
352 _GLIBCXX_CALL(seqs_end - seqs_begin)
353
354 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
355 ::value_type::first_type
356 RandomAccessIterator1;
357 typedef typename std::iterator_traits<RandomAccessIterator1>
358 ::value_type
359 value_type;
360 typedef typename std::iterator_traits<RandomAccessIterator1>
361 ::difference_type
362 difference_type;
363
364 // Last element in sequence.
365 value_type* max = NULL;
366 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
367 {
368 if ((*s).first == (*s).second)
369 continue;
370
371 // Last element in sequence.
372 value_type& v = *((*s).second - 1);
373
374 // Strictly greater.
375 if (!max || comp(*max, v))
376 max = &v;
377 }
378
379 difference_type overhang_size = 0;
380 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
381 {
382 RandomAccessIterator1 split =
383 std::lower_bound((*s).first, (*s).second, *max, comp);
384 overhang_size += (*s).second - split;
385
386 // Set sentinel.
387 *((*s).second) = *max;
388 }
389
390 // So many elements will be left over afterwards.
391 return overhang_size;
392 }
393
394 /** @brief Highly efficient 3-way merging procedure.
395 * @param seqs_begin Begin iterator of iterator pair input sequence.
396 * @param seqs_end End iterator of iterator pair input sequence.
397 * @param target Begin iterator out output sequence.
398 * @param comp Comparator.
399 * @param length Maximum length to merge.
400 * @param stable Unused, stable anyway.
401 * @return End iterator of output sequence. */
402 template<
403 template<typename RAI, typename C> class iterator,
404 typename RandomAccessIteratorIterator,
405 typename RandomAccessIterator3,
406 typename _DifferenceTp,
407 typename Comparator>
408 RandomAccessIterator3
409 multiway_merge_3_variant(
410 RandomAccessIteratorIterator seqs_begin,
411 RandomAccessIteratorIterator seqs_end,
412 RandomAccessIterator3 target,
413 Comparator comp, _DifferenceTp length, bool stable)
414 {
415 _GLIBCXX_CALL(length);
416
417 typedef _DifferenceTp difference_type;
418
419 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
420 ::value_type::first_type
421 RandomAccessIterator1;
422 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
423 value_type;
424
425 if (length == 0)
426 return target;
427
428 iterator<RandomAccessIterator1, Comparator>
429 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
430 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
431 seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
432
433 if (seq0 <= seq1)
434 {
435 if (seq1 <= seq2)
436 goto s012;
437 else
438 if (seq2 < seq0)
439 goto s201;
440 else
441 goto s021;
442 }
443 else
444 {
445 if (seq1 <= seq2)
446 {
447 if (seq0 <= seq2)
448 goto s102;
449 else
450 goto s120;
451 }
452 else
453 goto s210;
454 }
455
456 #define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\
457 s ## a ## b ## c : \
458 *target = *seq ## a; \
459 ++target; \
460 --length; \
461 ++seq ## a; \
462 if (length == 0) goto finish; \
463 if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
464 if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
465 goto s ## b ## c ## a;
466
467 _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
468 _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
469 _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < );
470 _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=);
471 _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=);
472 _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < );
473
474 #undef _GLIBCXX_PARALLEL_MERGE_3_CASE
475
476 finish:
477 ;
478
479 seqs_begin[0].first = seq0;
480 seqs_begin[1].first = seq1;
481 seqs_begin[2].first = seq2;
482
483 return target;
484 }
485
486 template<
487 typename RandomAccessIteratorIterator,
488 typename RandomAccessIterator3,
489 typename _DifferenceTp,
490 typename Comparator>
491 RandomAccessIterator3
492 multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
493 RandomAccessIteratorIterator seqs_end,
494 RandomAccessIterator3 target,
495 Comparator comp,
496 _DifferenceTp length, bool stable)
497 {
498 _GLIBCXX_CALL(length);
499
500 typedef _DifferenceTp difference_type;
501 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
502 ::value_type::first_type
503 RandomAccessIterator1;
504 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
505 value_type;
506
507 int min_seq;
508 RandomAccessIterator3 target_end;
509
510 // Stable anyway.
511 difference_type overhang =
512 prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
513
514 difference_type total_length = 0;
515 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
516 total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
517
518 if (overhang != -1)
519 {
520 difference_type unguarded_length =
521 std::min(length, total_length - overhang);
522 target_end = multiway_merge_3_variant<unguarded_iterator>
523 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
524 overhang = length - unguarded_length;
525 }
526 else
527 {
528 // Empty sequence found.
529 overhang = length;
530 target_end = target;
531 }
532
533 #if _GLIBCXX_ASSERTIONS
534 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
535 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
536 #endif
537
538 switch (min_seq)
539 {
540 case 0:
541 // Iterators will be advanced accordingly.
542 target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
543 seqs_begin[2].first, seqs_begin[2].second,
544 target_end, overhang, comp);
545 break;
546 case 1:
547 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
548 seqs_begin[2].first, seqs_begin[2].second,
549 target_end, overhang, comp);
550 break;
551 case 2:
552 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
553 seqs_begin[1].first, seqs_begin[1].second,
554 target_end, overhang, comp);
555 break;
556 default:
557 _GLIBCXX_PARALLEL_ASSERT(false);
558 }
559
560 #if _GLIBCXX_ASSERTIONS
561 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
562 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
563 #endif
564
565 return target_end;
566 }
567
568 /** @brief Highly efficient 4-way merging procedure.
569 * @param seqs_begin Begin iterator of iterator pair input sequence.
570 * @param seqs_end End iterator of iterator pair input sequence.
571 * @param target Begin iterator out output sequence.
572 * @param comp Comparator.
573 * @param length Maximum length to merge.
574 * @param stable Unused, stable anyway.
575 * @return End iterator of output sequence. */
576 template<
577 template<typename RAI, typename C> class iterator,
578 typename RandomAccessIteratorIterator,
579 typename RandomAccessIterator3,
580 typename _DifferenceTp,
581 typename Comparator>
582 RandomAccessIterator3
583 multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
584 RandomAccessIteratorIterator seqs_end,
585 RandomAccessIterator3 target,
586 Comparator comp, _DifferenceTp length, bool stable)
587 {
588 _GLIBCXX_CALL(length);
589 typedef _DifferenceTp difference_type;
590
591 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
592 ::value_type::first_type
593 RandomAccessIterator1;
594 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
595 value_type;
596
597 iterator<RandomAccessIterator1, Comparator>
598 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
599 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
600 seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
601 seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
602
603 #define _GLIBCXX_PARALLEL_DECISION(a,b,c,d) { \
604 if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \
605 if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \
606 if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \
607 goto s ## a ## b ## c ## d; }
608
609 if (seq0 <= seq1)
610 {
611 if (seq1 <= seq2)
612 _GLIBCXX_PARALLEL_DECISION(0,1,2,3)
613 else
614 if (seq2 < seq0)
615 _GLIBCXX_PARALLEL_DECISION(2,0,1,3)
616 else
617 _GLIBCXX_PARALLEL_DECISION(0,2,1,3)
618 }
619 else
620 {
621 if (seq1 <= seq2)
622 {
623 if (seq0 <= seq2)
624 _GLIBCXX_PARALLEL_DECISION(1,0,2,3)
625 else
626 _GLIBCXX_PARALLEL_DECISION(1,2,0,3)
627 }
628 else
629 _GLIBCXX_PARALLEL_DECISION(2,1,0,3)
630 }
631
632 #define _GLIBCXX_PARALLEL_MERGE_4_CASE(a,b,c,d,c0,c1,c2) \
633 s ## a ## b ## c ## d: \
634 if (length == 0) goto finish; \
635 *target = *seq ## a; \
636 ++target; \
637 --length; \
638 ++seq ## a; \
639 if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \
640 if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \
641 if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \
642 goto s ## b ## c ## d ## a;
643
644 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=);
645 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=);
646 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=);
647 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=);
648 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=);
649 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=);
650 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=);
651 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=);
652 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=);
653 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < );
654 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=);
655 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < );
656 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=);
657 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < );
658 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=);
659 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < );
660 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < );
661 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < );
662 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < );
663 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < );
664 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < );
665 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < );
666 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < );
667 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < );
668
669 #undef _GLIBCXX_PARALLEL_MERGE_4_CASE
670 #undef _GLIBCXX_PARALLEL_DECISION
671
672 finish:
673 ;
674
675 seqs_begin[0].first = seq0;
676 seqs_begin[1].first = seq1;
677 seqs_begin[2].first = seq2;
678 seqs_begin[3].first = seq3;
679
680 return target;
681 }
682
683 template<
684 typename RandomAccessIteratorIterator,
685 typename RandomAccessIterator3,
686 typename _DifferenceTp,
687 typename Comparator>
688 RandomAccessIterator3
689 multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
690 RandomAccessIteratorIterator seqs_end,
691 RandomAccessIterator3 target,
692 Comparator comp,
693 _DifferenceTp length, bool stable)
694 {
695 _GLIBCXX_CALL(length);
696 typedef _DifferenceTp difference_type;
697
698 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
699 ::value_type::first_type
700 RandomAccessIterator1;
701 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
702 value_type;
703
704 int min_seq;
705 RandomAccessIterator3 target_end;
706
707 // Stable anyway.
708 difference_type overhang =
709 prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
710
711 difference_type total_length = 0;
712 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
713 total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
714
715 if (overhang != -1)
716 {
717 difference_type unguarded_length =
718 std::min(length, total_length - overhang);
719 target_end = multiway_merge_4_variant<unguarded_iterator>
720 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
721 overhang = length - unguarded_length;
722 }
723 else
724 {
725 // Empty sequence found.
726 overhang = length;
727 target_end = target;
728 }
729
730 #if _GLIBCXX_ASSERTIONS
731 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
732 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
733 #endif
734
735 std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> >
736 one_missing(seqs_begin, seqs_end);
737 one_missing.erase(one_missing.begin() + min_seq); //remove
738
739 target_end = multiway_merge_3_variant<guarded_iterator>(
740 one_missing.begin(), one_missing.end(),
741 target_end, comp, overhang, stable);
742
743 // Insert back again.
744 one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
745 // Write back modified iterators.
746 copy(one_missing.begin(), one_missing.end(), seqs_begin);
747
748 #if _GLIBCXX_ASSERTIONS
749 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
750 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
751 #endif
752
753 return target_end;
754 }
755
756 /** @brief Basic multi-way merging procedure.
757 *
758 * The head elements are kept in a sorted array, new heads are
759 * inserted linearly.
760 * @param seqs_begin Begin iterator of iterator pair input sequence.
761 * @param seqs_end End iterator of iterator pair input sequence.
762 * @param target Begin iterator out output sequence.
763 * @param comp Comparator.
764 * @param length Maximum length to merge.
765 * @param stable Stable merging incurs a performance penalty.
766 * @return End iterator of output sequence.
767 */
768 template<
769 typename RandomAccessIteratorIterator,
770 typename RandomAccessIterator3,
771 typename _DifferenceTp,
772 typename Comparator>
773 RandomAccessIterator3
774 multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
775 RandomAccessIteratorIterator seqs_end,
776 RandomAccessIterator3 target,
777 Comparator comp, _DifferenceTp length, bool stable)
778 {
779 _GLIBCXX_CALL(length)
780
781 typedef _DifferenceTp difference_type;
782 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
783 ::value_type::first_type
784 RandomAccessIterator1;
785 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
786 value_type;
787
788 int k = static_cast<int>(seqs_end - seqs_begin);
789 int nrs; // Number of remaining sequences.
790
791 // Avoid default constructor.
792 value_type* fe = static_cast<value_type*>(
793 ::operator new(sizeof(value_type) * k)); // Front elements.
794 int* source = new int[k];
795 difference_type total_length = 0;
796
797 // Write entries into queue.
798 nrs = 0;
799 for (int pi = 0; pi < k; ++pi)
800 {
801 if (seqs_begin[pi].first != seqs_begin[pi].second)
802 {
803 ::new(&(fe[nrs])) value_type(*(seqs_begin[pi].first));
804 source[nrs] = pi;
805 ++nrs;
806 total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]);
807 }
808 }
809
810 if (stable)
811 {
812 // Bubble sort fe and source by fe.
813 for (int k = 0; k < nrs - 1; ++k)
814 for (int pi = nrs - 1; pi > k; --pi)
815 if (comp(fe[pi], fe[pi - 1]) ||
816 (!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1]))
817 {
818 std::swap(fe[pi - 1], fe[pi]);
819 std::swap(source[pi - 1], source[pi]);
820 }
821 }
822 else
823 {
824 for (int k = 0; k < nrs - 1; ++k)
825 for (int pi = nrs - 1; pi > k; --pi)
826 if (comp(fe[pi], fe[pi-1]))
827 {
828 std::swap(fe[pi-1], fe[pi]);
829 std::swap(source[pi-1], source[pi]);
830 }
831 }
832
833 // Iterate.
834 if (stable)
835 {
836 int j;
837 while (nrs > 0 && length > 0)
838 {
839 if (source[0] < source[1])
840 {
841 // fe[0] <= fe[1]
842 while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0)
843 {
844 *target = fe[0];
845 ++target;
846 ++(seqs_begin[source[0]].first);
847 --length;
848 if (seqs_begin[source[0]].first == seqs_begin[source[0]].second)
849 {
850 // Move everything to the left.
851 for (int s = 0; s < nrs - 1; ++s)
852 {
853 fe[s] = fe[s + 1];
854 source[s] = source[s + 1];
855 }
856 fe[nrs - 1].~value_type(); //Destruct explicitly.
857 --nrs;
858 break;
859 }
860 else
861 fe[0] = *(seqs_begin[source[0]].first);
862 }
863 }
864 else
865 {
866 // fe[0] < fe[1]
867 while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0)
868 {
869 *target = fe[0];
870 ++target;
871 ++(seqs_begin[source[0]].first);
872 --length;
873 if (seqs_begin[source[0]].first == seqs_begin[source[0]].second)
874 {
875 for (int s = 0; s < nrs - 1; ++s)
876 {
877 fe[s] = fe[s + 1];
878 source[s] = source[s + 1];
879 }
880 fe[nrs - 1].~value_type(); //Destruct explicitly.
881 --nrs;
882 break;
883 }
884 else
885 fe[0] = *(seqs_begin[source[0]].first);
886 }
887 }
888
889 // Sink down.
890 j = 1;
891 while ((j < nrs) && (comp(fe[j], fe[j - 1]) ||
892 (!comp(fe[j - 1], fe[j])
893 && (source[j] < source[j - 1]))))
894 {
895 std::swap(fe[j - 1], fe[j]);
896 std::swap(source[j - 1], source[j]);
897 ++j;
898 }
899 }
900 }
901 else
902 {
903 int j;
904 while (nrs > 0 && length > 0)
905 {
906 // fe[0] <= fe[1]
907 while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0)
908 {
909 *target = fe[0];
910 ++target;
911 ++seqs_begin[source[0]].first;
912 --length;
913 if (seqs_begin[source[0]].first == seqs_begin[source[0]].second)
914 {
915 for (int s = 0; s < (nrs - 1); ++s)
916 {
917 fe[s] = fe[s + 1];
918 source[s] = source[s + 1];
919 }
920 fe[nrs - 1].~value_type(); //Destruct explicitly.
921 --nrs;
922 break;
923 }
924 else
925 fe[0] = *(seqs_begin[source[0]].first);
926 }
927
928 // Sink down.
929 j = 1;
930 while ((j < nrs) && comp(fe[j], fe[j - 1]))
931 {
932 std::swap(fe[j - 1], fe[j]);
933 std::swap(source[j - 1], source[j]);
934 ++j;
935 }
936 }
937 }
938
939 ::operator delete(fe); //Destructors already called.
940 delete[] source;
941
942 return target;
943 }
944
945 /** @brief Multi-way merging procedure for a high branching factor,
946 * guarded case.
947 *
948 * The head elements are kept in a loser tree.
949 * @param seqs_begin Begin iterator of iterator pair input sequence.
950 * @param seqs_end End iterator of iterator pair input sequence.
951 * @param target Begin iterator out output sequence.
952 * @param comp Comparator.
953 * @param length Maximum length to merge.
954 * @param stable Stable merging incurs a performance penalty.
955 * @return End iterator of output sequence.
956 */
957 template<
958 typename LT,
959 typename RandomAccessIteratorIterator,
960 typename RandomAccessIterator3,
961 typename _DifferenceTp,
962 typename Comparator>
963 RandomAccessIterator3
964 multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
965 RandomAccessIteratorIterator seqs_end,
966 RandomAccessIterator3 target,
967 Comparator comp,
968 _DifferenceTp length, bool stable)
969 {
970 _GLIBCXX_CALL(length)
971
972 typedef _DifferenceTp difference_type;
973 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
974 ::value_type::first_type
975 RandomAccessIterator1;
976 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
977 value_type;
978
979 int k = static_cast<int>(seqs_end - seqs_begin);
980
981 LT lt(k, comp);
982
983 difference_type total_length = 0;
984
985 // Default value for potentially non-default-constructible types.
986 value_type* arbitrary_element = NULL;
987
988 for (int t = 0; t < k; ++t)
989 {
990 if(arbitrary_element == NULL && _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0)
991 arbitrary_element = &(*seqs_begin[t].first);
992 total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
993 }
994
995 if(total_length == 0)
996 return target;
997
998 for (int t = 0; t < k; ++t)
999 {
1000 if (stable)
1001 {
1002 if (seqs_begin[t].first == seqs_begin[t].second)
1003 lt.insert_start_stable(*arbitrary_element, t, true);
1004 else
1005 lt.insert_start_stable(*seqs_begin[t].first, t, false);
1006 }
1007 else
1008 {
1009 if (seqs_begin[t].first == seqs_begin[t].second)
1010 lt.insert_start(*arbitrary_element, t, true);
1011 else
1012 lt.insert_start(*seqs_begin[t].first, t, false);
1013 }
1014 }
1015
1016 if (stable)
1017 lt.init_stable();
1018 else
1019 lt.init();
1020
1021 total_length = std::min(total_length, length);
1022
1023 int source;
1024
1025 if (stable)
1026 {
1027 for (difference_type i = 0; i < total_length; ++i)
1028 {
1029 // Take out.
1030 source = lt.get_min_source();
1031
1032 *(target++) = *(seqs_begin[source].first++);
1033
1034 // Feed.
1035 if (seqs_begin[source].first == seqs_begin[source].second)
1036 lt.delete_min_insert_stable(*arbitrary_element, true);
1037 else
1038 // Replace from same source.
1039 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
1040
1041 }
1042 }
1043 else
1044 {
1045 for (difference_type i = 0; i < total_length; ++i)
1046 {
1047 //take out
1048 source = lt.get_min_source();
1049
1050 *(target++) = *(seqs_begin[source].first++);
1051
1052 // Feed.
1053 if (seqs_begin[source].first == seqs_begin[source].second)
1054 lt.delete_min_insert(*arbitrary_element, true);
1055 else
1056 // Replace from same source.
1057 lt.delete_min_insert(*seqs_begin[source].first, false);
1058 }
1059 }
1060
1061 return target;
1062 }
1063
1064 /** @brief Multi-way merging procedure for a high branching factor,
1065 * unguarded case.
1066 *
1067 * The head elements are kept in a loser tree.
1068 * @param seqs_begin Begin iterator of iterator pair input sequence.
1069 * @param seqs_end End iterator of iterator pair input sequence.
1070 * @param target Begin iterator out output sequence.
1071 * @param comp Comparator.
1072 * @param length Maximum length to merge.
1073 * @param stable Stable merging incurs a performance penalty.
1074 * @return End iterator of output sequence.
1075 * @pre No input will run out of elements during the merge.
1076 */
1077 template<
1078 typename LT,
1079 typename RandomAccessIteratorIterator,
1080 typename RandomAccessIterator3,
1081 typename _DifferenceTp, typename Comparator>
1082 RandomAccessIterator3
1083 multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
1084 RandomAccessIteratorIterator seqs_end,
1085 RandomAccessIterator3 target,
1086 Comparator comp,
1087 _DifferenceTp length, bool stable)
1088 {
1089 _GLIBCXX_CALL(length)
1090 typedef _DifferenceTp difference_type;
1091
1092 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1093 ::value_type::first_type
1094 RandomAccessIterator1;
1095 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1096 value_type;
1097
1098 int k = seqs_end - seqs_begin;
1099
1100 LT lt(k, comp);
1101
1102 difference_type total_length = 0;
1103
1104 for (int t = 0; t < k; ++t)
1105 {
1106 #if _GLIBCXX_ASSERTIONS
1107 _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
1108 #endif
1109 if (stable)
1110 lt.insert_start_stable(*seqs_begin[t].first, t, false);
1111 else
1112 lt.insert_start(*seqs_begin[t].first, t, false);
1113
1114 total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
1115 }
1116
1117 if (stable)
1118 lt.init_stable();
1119 else
1120 lt.init();
1121
1122 // Do not go past end.
1123 length = std::min(total_length, length);
1124
1125 int source;
1126
1127 #if _GLIBCXX_ASSERTIONS
1128 difference_type i = 0;
1129 #endif
1130
1131 if (stable)
1132 {
1133 RandomAccessIterator3 target_end = target + length;
1134 while (target < target_end)
1135 {
1136 // Take out.
1137 source = lt.get_min_source();
1138
1139 #if _GLIBCXX_ASSERTIONS
1140 _GLIBCXX_PARALLEL_ASSERT(i == 0
1141 || !comp(*(seqs_begin[source].first), *(target - 1)));
1142 #endif
1143
1144 *(target++) = *(seqs_begin[source].first++);
1145
1146 #if _GLIBCXX_ASSERTIONS
1147 _GLIBCXX_PARALLEL_ASSERT(
1148 (seqs_begin[source].first != seqs_begin[source].second)
1149 || (i == length - 1));
1150 ++i;
1151 #endif
1152 // Feed.
1153 // Replace from same source.
1154 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
1155
1156 }
1157 }
1158 else
1159 {
1160 RandomAccessIterator3 target_end = target + length;
1161 while (target < target_end)
1162 {
1163 // Take out.
1164 source = lt.get_min_source();
1165
1166 #if _GLIBCXX_ASSERTIONS
1167 if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
1168 printf(" %i %i %i\n", length, i, source);
1169 _GLIBCXX_PARALLEL_ASSERT(i == 0
1170 || !comp(*(seqs_begin[source].first), *(target - 1)));
1171 #endif
1172
1173 *(target++) = *(seqs_begin[source].first++);
1174
1175 #if _GLIBCXX_ASSERTIONS
1176 if (!((seqs_begin[source].first != seqs_begin[source].second)
1177 || (i >= length - 1)))
1178 printf(" %i %i %i\n", length, i, source);
1179 _GLIBCXX_PARALLEL_ASSERT(
1180 (seqs_begin[source].first != seqs_begin[source].second)
1181 || (i >= length - 1));
1182 ++i;
1183 #endif
1184 // Feed.
1185 // Replace from same source.
1186 lt.delete_min_insert(*seqs_begin[source].first, false);
1187 }
1188 }
1189
1190 return target;
1191 }
1192
1193 template<
1194 typename RandomAccessIteratorIterator,
1195 typename RandomAccessIterator3,
1196 typename _DifferenceTp,
1197 typename Comparator>
1198 RandomAccessIterator3
1199 multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin,
1200 RandomAccessIteratorIterator seqs_end,
1201 RandomAccessIterator3 target,
1202 Comparator comp,
1203 _DifferenceTp length, bool stable)
1204 {
1205 _GLIBCXX_CALL(length)
1206
1207 typedef _DifferenceTp difference_type;
1208
1209 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1210 ::value_type::first_type
1211 RandomAccessIterator1;
1212 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1213 value_type;
1214
1215 int min_seq;
1216 RandomAccessIterator3 target_end;
1217 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
1218 comp, min_seq, stable);
1219
1220 difference_type total_length = 0;
1221 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1222 total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
1223
1224 if (overhang != -1)
1225 {
1226 difference_type unguarded_length =
1227 std::min(length, total_length - overhang);
1228 target_end = multiway_merge_loser_tree_unguarded
1229 <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
1230 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1231 overhang = length - unguarded_length;
1232 }
1233 else
1234 {
1235 // Empty sequence found.
1236 overhang = length;
1237 target_end = target;
1238 }
1239
1240 #if _GLIBCXX_ASSERTIONS
1241 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1242 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1243 #endif
1244
1245 target_end = multiway_merge_loser_tree
1246 <typename loser_tree_traits<value_type, Comparator>::LT>
1247 (seqs_begin, seqs_end, target_end, comp, overhang, stable);
1248
1249 #if _GLIBCXX_ASSERTIONS
1250 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1251 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1252 #endif
1253
1254 return target_end;
1255 }
1256
1257 template<
1258 typename RandomAccessIteratorIterator,
1259 typename RandomAccessIterator3,
1260 typename _DifferenceTp,
1261 typename Comparator>
1262 RandomAccessIterator3
1263 multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
1264 RandomAccessIteratorIterator seqs_end,
1265 RandomAccessIterator3 target,
1266 Comparator comp,
1267 _DifferenceTp length, bool stable)
1268 {
1269 _GLIBCXX_CALL(length)
1270
1271 typedef _DifferenceTp difference_type;
1272 typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
1273 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1274 ::value_type::first_type
1275 RandomAccessIterator1;
1276 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1277 value_type;
1278
1279 RandomAccessIterator3 target_end;
1280 difference_type overhang =
1281 prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
1282
1283 difference_type total_length = 0;
1284 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1285 {
1286 total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
1287
1288 // Sentinel spot.
1289 ++((*s).second);
1290 }
1291
1292 difference_type unguarded_length =
1293 std::min(length, total_length - overhang);
1294 target_end = multiway_merge_loser_tree_unguarded
1295 <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
1296 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1297 overhang = length - unguarded_length;
1298
1299 #if _GLIBCXX_ASSERTIONS
1300 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1301 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1302 #endif
1303
1304 // Copy rest stable.
1305 for (RandomAccessIteratorIterator s = seqs_begin;
1306 s != seqs_end && overhang > 0; ++s)
1307 {
1308 // Restore.
1309 --((*s).second);
1310 difference_type local_length =
1311 std::min<difference_type>(overhang, _GLIBCXX_PARALLEL_LENGTH(*s));
1312 target_end = std::copy((*s).first, (*s).first + local_length,
1313 target_end);
1314 (*s).first += local_length;
1315 overhang -= local_length;
1316 }
1317
1318 #if _GLIBCXX_ASSERTIONS
1319 _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
1320 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1321 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1322 #endif
1323
1324 return target_end;
1325 }
1326
1327 /** @brief Sequential multi-way merging switch.
1328 *
1329 * The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and runtime settings.
1330 * @param seqs_begin Begin iterator of iterator pair input sequence.
1331 * @param seqs_end End iterator of iterator pair input sequence.
1332 * @param target Begin iterator out output sequence.
1333 * @param comp Comparator.
1334 * @param length Maximum length to merge.
1335 * @param stable Stable merging incurs a performance penalty.
1336 * @param sentinel The sequences have a sentinel element.
1337 * @return End iterator of output sequence. */
1338 template<
1339 typename RandomAccessIteratorIterator,
1340 typename RandomAccessIterator3,
1341 typename _DifferenceTp,
1342 typename Comparator>
1343 RandomAccessIterator3
1344 multiway_merge(RandomAccessIteratorIterator seqs_begin,
1345 RandomAccessIteratorIterator seqs_end,
1346 RandomAccessIterator3 target,
1347 Comparator comp, _DifferenceTp length,
1348 bool stable, bool sentinel,
1349 sequential_tag)
1350 {
1351 _GLIBCXX_CALL(length)
1352
1353 typedef _DifferenceTp difference_type;
1354 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1355 ::value_type::first_type
1356 RandomAccessIterator1;
1357 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1358 value_type;
1359
1360 #if _GLIBCXX_ASSERTIONS
1361 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1362 _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
1363 #endif
1364
1365 RandomAccessIterator3 return_target = target;
1366 int k = static_cast<int>(seqs_end - seqs_begin);
1367
1368 Settings::MultiwayMergeAlgorithm mwma =
1369 Settings::multiway_merge_algorithm;
1370
1371 if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
1372 mwma = Settings::LOSER_TREE_COMBINED;
1373
1374 switch (k)
1375 {
1376 case 0:
1377 break;
1378 case 1:
1379 return_target = std::copy(seqs_begin[0].first,
1380 seqs_begin[0].first + length,
1381 target);
1382 seqs_begin[0].first += length;
1383 break;
1384 case 2:
1385 return_target = merge_advance(seqs_begin[0].first,
1386 seqs_begin[0].second,
1387 seqs_begin[1].first,
1388 seqs_begin[1].second,
1389 target, length, comp);
1390 break;
1391 case 3:
1392 switch (mwma)
1393 {
1394 case Settings::LOSER_TREE_COMBINED:
1395 return_target = multiway_merge_3_combined(seqs_begin,
1396 seqs_end,
1397 target,
1398 comp, length, stable);
1399 break;
1400 case Settings::LOSER_TREE_SENTINEL:
1401 return_target = multiway_merge_3_variant<unguarded_iterator>(
1402 seqs_begin,
1403 seqs_end,
1404 target,
1405 comp, length, stable);
1406 break;
1407 default:
1408 return_target = multiway_merge_3_variant<guarded_iterator>(
1409 seqs_begin,
1410 seqs_end,
1411 target,
1412 comp, length, stable);
1413 break;
1414 }
1415 break;
1416 case 4:
1417 switch (mwma)
1418 {
1419 case Settings::LOSER_TREE_COMBINED:
1420 return_target = multiway_merge_4_combined(
1421 seqs_begin,
1422 seqs_end,
1423 target,
1424 comp, length, stable);
1425 break;
1426 case Settings::LOSER_TREE_SENTINEL:
1427 return_target = multiway_merge_4_variant<unguarded_iterator>(
1428 seqs_begin,
1429 seqs_end,
1430 target,
1431 comp, length, stable);
1432 break;
1433 default:
1434 return_target = multiway_merge_4_variant<guarded_iterator>(
1435 seqs_begin,
1436 seqs_end,
1437 target,
1438 comp, length, stable);
1439 break;
1440 }
1441 break;
1442 default:
1443 {
1444 switch (mwma)
1445 {
1446 case Settings::BUBBLE:
1447 return_target = multiway_merge_bubble(
1448 seqs_begin,
1449 seqs_end,
1450 target,
1451 comp, length, stable);
1452 break;
1453 #if _GLIBCXX_LOSER_TREE_EXPLICIT
1454 case Settings::LOSER_TREE_EXPLICIT:
1455 return_target = multiway_merge_loser_tree<
1456 LoserTreeExplicit<value_type, Comparator> >(
1457 seqs_begin,
1458 seqs_end,
1459 target,
1460 comp, length, stable);
1461 break;
1462 #endif
1463 #if _GLIBCXX_LOSER_TREE
1464 case Settings::LOSER_TREE:
1465 return_target = multiway_merge_loser_tree<
1466 LoserTree<value_type, Comparator> >(
1467 seqs_begin,
1468 seqs_end,
1469 target,
1470 comp, length, stable);
1471 break;
1472 #endif
1473 #if _GLIBCXX_LOSER_TREE_COMBINED
1474 case Settings::LOSER_TREE_COMBINED:
1475 return_target = multiway_merge_loser_tree_combined(
1476 seqs_begin,
1477 seqs_end,
1478 target,
1479 comp, length, stable);
1480 break;
1481 #endif
1482 #if _GLIBCXX_LOSER_TREE_SENTINEL
1483 case Settings::LOSER_TREE_SENTINEL:
1484 return_target = multiway_merge_loser_tree_sentinel(
1485 seqs_begin,
1486 seqs_end,
1487 target,
1488 comp, length, stable);
1489 break;
1490 #endif
1491 default:
1492 // multiway_merge algorithm not implemented.
1493 _GLIBCXX_PARALLEL_ASSERT(0);
1494 break;
1495 }
1496 }
1497 }
1498 #if _GLIBCXX_ASSERTIONS
1499 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1500 #endif
1501
1502 return return_target;
1503 }
1504
1505 /** @brief Parallel multi-way merge routine.
1506 *
1507 * The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and runtime settings.
1508 * @param seqs_begin Begin iterator of iterator pair input sequence.
1509 * @param seqs_end End iterator of iterator pair input sequence.
1510 * @param target Begin iterator out output sequence.
1511 * @param comp Comparator.
1512 * @param length Maximum length to merge.
1513 * @param stable Stable merging incurs a performance penalty.
1514 * @param sentinel Ignored.
1515 * @return End iterator of output sequence.
1516 */
1517 template<
1518 typename RandomAccessIteratorIterator,
1519 typename RandomAccessIterator3,
1520 typename _DifferenceTp,
1521 typename Comparator>
1522 RandomAccessIterator3
1523 parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
1524 RandomAccessIteratorIterator seqs_end,
1525 RandomAccessIterator3 target,
1526 Comparator comp,
1527 _DifferenceTp length, bool stable, bool sentinel)
1528 {
1529 _GLIBCXX_CALL(length)
1530
1531 typedef _DifferenceTp difference_type;
1532 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1533 ::value_type::first_type
1534 RandomAccessIterator1;
1535 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1536 value_type;
1537
1538 // k sequences.
1539 int k = static_cast<int>(seqs_end - seqs_begin);
1540
1541 difference_type total_length = 0;
1542 for (RandomAccessIteratorIterator raii = seqs_begin;
1543 raii != seqs_end; ++raii)
1544 total_length += _GLIBCXX_PARALLEL_LENGTH(*raii);
1545
1546 _GLIBCXX_CALL(total_length)
1547
1548 if (total_length == 0 || k == 0)
1549 return target;
1550
1551 bool tight = (total_length == length);
1552
1553 std::vector<std::pair<difference_type, difference_type> >* pieces;
1554
1555 thread_index_t num_threads = static_cast<thread_index_t>(
1556 std::min<difference_type>(get_max_threads(), total_length));
1557
1558 # pragma omp parallel num_threads (num_threads)
1559 {
1560 # pragma omp single
1561 {
1562 num_threads = omp_get_num_threads();
1563 // Thread t will have to merge pieces[iam][0..k - 1]
1564 pieces = new std::vector<
1565 std::pair<difference_type, difference_type> >[num_threads];
1566 for (int s = 0; s < num_threads; ++s)
1567 pieces[s].resize(k);
1568
1569 difference_type num_samples =
1570 Settings::merge_oversampling * num_threads;
1571
1572 if (Settings::multiway_merge_splitting == Settings::SAMPLING)
1573 {
1574 value_type* samples = static_cast<value_type*>(
1575 ::operator new(sizeof(value_type) * k * num_samples));
1576 // Sample.
1577 for (int s = 0; s < k; ++s)
1578 for (difference_type i = 0; i < num_samples; ++i)
1579 {
1580 difference_type sample_index =
1581 static_cast<difference_type>(
1582 _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) * (double(i + 1) /
1583 (num_samples + 1)) * (double(length)
1584 / total_length));
1585 ::new(&(samples[s * num_samples + i])) value_type(
1586 seqs_begin[s].first[sample_index]);
1587 }
1588
1589 if (stable)
1590 __gnu_sequential::stable_sort(
1591 samples, samples + (num_samples * k), comp);
1592 else
1593 __gnu_sequential::sort(
1594 samples, samples + (num_samples * k), comp);
1595
1596 for (int slab = 0; slab < num_threads; ++slab)
1597 // For each slab / processor.
1598 for (int seq = 0; seq < k; ++seq)
1599 {
1600 // For each sequence.
1601 if (slab > 0)
1602 pieces[slab][seq].first =
1603 std::upper_bound(
1604 seqs_begin[seq].first,
1605 seqs_begin[seq].second,
1606 samples[num_samples * k * slab / num_threads],
1607 comp)
1608 - seqs_begin[seq].first;
1609 else
1610 {
1611 // Absolute beginning.
1612 pieces[slab][seq].first = 0;
1613 }
1614 if ((slab + 1) < num_threads)
1615 pieces[slab][seq].second =
1616 std::upper_bound(
1617 seqs_begin[seq].first,
1618 seqs_begin[seq].second,
1619 samples[num_samples * k * (slab + 1) /
1620 num_threads], comp)
1621 - seqs_begin[seq].first;
1622 else
1623 pieces[slab][seq].second = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
1624 }
1625 ::operator delete(samples);
1626 }
1627 else
1628 {
1629 // (Settings::multiway_merge_splitting == Settings::EXACT).
1630 std::vector<RandomAccessIterator1>* offsets =
1631 new std::vector<RandomAccessIterator1>[num_threads];
1632 std::vector<
1633 std::pair<RandomAccessIterator1, RandomAccessIterator1>
1634 > se(k);
1635
1636 copy(seqs_begin, seqs_end, se.begin());
1637
1638 difference_type* borders =
1639 new difference_type[num_threads + 1];
1640 equally_split(length, num_threads, borders);
1641
1642 for (int s = 0; s < (num_threads - 1); ++s)
1643 {
1644 offsets[s].resize(k);
1645 multiseq_partition(
1646 se.begin(), se.end(), borders[s + 1],
1647 offsets[s].begin(), comp);
1648
1649 // Last one also needed and available.
1650 if (!tight)
1651 {
1652 offsets[num_threads - 1].resize(k);
1653 multiseq_partition(se.begin(), se.end(),
1654 difference_type(length),
1655 offsets[num_threads - 1].begin(), comp);
1656 }
1657 }
1658
1659
1660 for (int slab = 0; slab < num_threads; ++slab)
1661 {
1662 // For each slab / processor.
1663 for (int seq = 0; seq < k; ++seq)
1664 {
1665 // For each sequence.
1666 if (slab == 0)
1667 {
1668 // Absolute beginning.
1669 pieces[slab][seq].first = 0;
1670 }
1671 else
1672 pieces[slab][seq].first =
1673 pieces[slab - 1][seq].second;
1674 if (!tight || slab < (num_threads - 1))
1675 pieces[slab][seq].second =
1676 offsets[slab][seq] - seqs_begin[seq].first;
1677 else
1678 {
1679 // slab == num_threads - 1
1680 pieces[slab][seq].second =
1681 _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
1682 }
1683 }
1684 }
1685 delete[] offsets;
1686 }
1687 } //single
1688
1689 thread_index_t iam = omp_get_thread_num();
1690
1691 difference_type target_position = 0;
1692
1693 for (int c = 0; c < k; ++c)
1694 target_position += pieces[iam][c].first;
1695
1696 if (k > 2)
1697 {
1698 std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks
1699 = new
1700 std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
1701
1702 difference_type local_length = 0;
1703 for (int s = 0; s < k; ++s)
1704 {
1705 chunks[s] = std::make_pair(
1706 seqs_begin[s].first + pieces[iam][s].first,
1707 seqs_begin[s].first + pieces[iam][s].second);
1708 local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]);
1709 }
1710
1711 multiway_merge(
1712 chunks, chunks + k, target + target_position, comp,
1713 std::min(local_length, length - target_position),
1714 stable, false, sequential_tag());
1715
1716 delete[] chunks;
1717 }
1718 else if (k == 2)
1719 {
1720 RandomAccessIterator1
1721 begin0 = seqs_begin[0].first + pieces[iam][0].first,
1722 begin1 = seqs_begin[1].first + pieces[iam][1].first;
1723 merge_advance(begin0,
1724 seqs_begin[0].first + pieces[iam][0].second,
1725 begin1,
1726 seqs_begin[1].first + pieces[iam][1].second,
1727 target + target_position,
1728 (pieces[iam][0].second - pieces[iam][0].first) +
1729 (pieces[iam][1].second - pieces[iam][1].first),
1730 comp);
1731 }
1732 } //parallel
1733
1734 #if _GLIBCXX_ASSERTIONS
1735 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1736 #endif
1737
1738 // Update ends of sequences.
1739 for (int s = 0; s < k; ++s)
1740 seqs_begin[s].first += pieces[num_threads - 1][s].second;
1741
1742 delete[] pieces;
1743
1744 return target + length;
1745 }
1746
1747 /**
1748 * @brief Multi-way merging front-end.
1749 * @param seqs_begin Begin iterator of iterator pair input sequence.
1750 * @param seqs_end End iterator of iterator pair input sequence.
1751 * @param target Begin iterator out output sequence.
1752 * @param comp Comparator.
1753 * @param length Maximum length to merge.
1754 * @param stable Stable merging incurs a performance penalty.
1755 * @return End iterator of output sequence.
1756 */
1757 template<
1758 typename RandomAccessIteratorPairIterator,
1759 typename RandomAccessIterator3,
1760 typename _DifferenceTp,
1761 typename Comparator>
1762 RandomAccessIterator3
1763 multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
1764 RandomAccessIteratorPairIterator seqs_end,
1765 RandomAccessIterator3 target, Comparator comp,
1766 _DifferenceTp length, bool stable)
1767 {
1768 typedef _DifferenceTp difference_type;
1769 _GLIBCXX_CALL(seqs_end - seqs_begin)
1770
1771 if (seqs_begin == seqs_end)
1772 return target;
1773
1774 RandomAccessIterator3 target_end;
1775 if (_GLIBCXX_PARALLEL_CONDITION(
1776 ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
1777 && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1778 target_end = parallel_multiway_merge(
1779 seqs_begin, seqs_end,
1780 target, comp, static_cast<difference_type>(length), stable, false);
1781 else
1782 target_end = multiway_merge(
1783 seqs_begin, seqs_end,
1784 target, comp, length, stable, false, sequential_tag());
1785
1786 return target_end;
1787 }
1788
1789 /** @brief Multi-way merging front-end.
1790 * @param seqs_begin Begin iterator of iterator pair input sequence.
1791 * @param seqs_end End iterator of iterator pair input sequence.
1792 * @param target Begin iterator out output sequence.
1793 * @param comp Comparator.
1794 * @param length Maximum length to merge.
1795 * @param stable Stable merging incurs a performance penalty.
1796 * @return End iterator of output sequence.
1797 * @pre For each @c i, @c seqs_begin[i].second must be the end
1798 * marker of the sequence, but also reference the one more sentinel
1799 * element. */
1800 template<
1801 typename RandomAccessIteratorPairIterator,
1802 typename RandomAccessIterator3,
1803 typename _DifferenceTp,
1804 typename Comparator>
1805 RandomAccessIterator3
1806 multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
1807 RandomAccessIteratorPairIterator seqs_end,
1808 RandomAccessIterator3 target,
1809 Comparator comp,
1810 _DifferenceTp length,
1811 bool stable)
1812 {
1813 typedef _DifferenceTp difference_type;
1814
1815 if (seqs_begin == seqs_end)
1816 return target;
1817
1818 _GLIBCXX_CALL(seqs_end - seqs_begin)
1819
1820 if (_GLIBCXX_PARALLEL_CONDITION(
1821 ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
1822 && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1823 return parallel_multiway_merge(
1824 seqs_begin, seqs_end,
1825 target, comp, static_cast<difference_type>(length), stable, true);
1826 else
1827 return multiway_merge(
1828 seqs_begin, seqs_end,
1829 target, comp, length, stable, true, sequential_tag());
1830 }
1831 }
1832
1833 #endif