multiway_merge.h: More robust finding of an arbitrary existing element inside the...
[gcc.git] / libstdc++-v3 / include / parallel / multiway_merge.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
20
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
29 // Public License.
30
31 /** @file parallel/multiway_merge.h
32 * @brief Implementation of sequential and parallel multiway merge.
33 *
34 * Explanations on the high-speed merging routines in the appendix of
35 *
36 * P. Sanders.
37 * Fast priority queues for cached memory.
38 * ACM Journal of Experimental Algorithmics, 5, 2000.
39 *
40 * This file is a GNU parallel extension to the Standard C++ Library.
41 */
42
43 // Written by Johannes Singler.
44
45 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
46 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
47
48 #include <vector>
49
50 #include <bits/stl_algo.h>
51 #include <parallel/features.h>
52 #include <parallel/parallel.h>
53 #include <parallel/merge.h>
54 #include <parallel/losertree.h>
55 #if _GLIBCXX_ASSERTIONS
56 #include <parallel/checkers.h>
57 #endif
58
59 /** @brief Length of a sequence described by a pair of iterators. */
60 #define LENGTH(s) ((s).second - (s).first)
61
62 // XXX need iterator typedefs
63 namespace __gnu_parallel
64 {
65 template<typename RandomAccessIterator, typename Comparator>
66 class guarded_iterator;
67
68 template<typename RandomAccessIterator, typename Comparator>
69 inline bool
70 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
71 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
72
73 template<typename RandomAccessIterator, typename Comparator>
74 inline bool
75 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
76 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
77
78 /** @brief Iterator wrapper supporting an implicit supremum at the end
79 of the sequence, dominating all comparisons.
80 * Deriving from RandomAccessIterator is not possible since
81 * RandomAccessIterator need not be a class.
82 */
83 template<typename RandomAccessIterator, typename Comparator>
84 class guarded_iterator
85 {
86 private:
87 /** @brief Current iterator position. */
88 RandomAccessIterator current;
89
90 /** @brief End iterator of the sequence. */
91 RandomAccessIterator end;
92
93 /** @brief Comparator. */
94 Comparator& comp;
95
96 public:
97 /** @brief Constructor. Sets iterator to beginning of sequence.
98 * @param begin Begin iterator of sequence.
99 * @param end End iterator of sequence.
100 * @param comp Comparator provided for associated overloaded
101 * compare operators. */
102 inline guarded_iterator(RandomAccessIterator begin,
103 RandomAccessIterator end, Comparator& comp)
104 : current(begin), end(end), comp(comp)
105 { }
106
107 /** @brief Pre-increment operator.
108 * @return This. */
109 inline guarded_iterator<RandomAccessIterator, Comparator>&
110 operator++()
111 {
112 ++current;
113 return *this;
114 }
115
116 /** @brief Dereference operator.
117 * @return Referenced element. */
118 inline typename std::iterator_traits<RandomAccessIterator>::value_type
119 operator*()
120 { return *current; }
121
122 /** @brief Convert to wrapped iterator.
123 * @return Wrapped iterator. */
124 inline operator RandomAccessIterator()
125 { return current; }
126
127 friend bool
128 operator< <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
129
130 friend bool
131 operator<= <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
132 };
133
134 /** @brief Compare two elements referenced by guarded iterators.
135 * @param bi1 First iterator.
136 * @param bi2 Second iterator.
137 * @return @c True if less. */
138 template<typename RandomAccessIterator, typename Comparator>
139 inline bool
140 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
141 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
142 {
143 if (bi1.current == bi1.end) //bi1 is sup
144 return bi2.current == bi2.end; //bi2 is not sup
145 if (bi2.current == bi2.end) //bi2 is sup
146 return true;
147 return (bi1.comp)(*bi1, *bi2); //normal compare
148 }
149
150 /** @brief Compare two elements referenced by guarded iterators.
151 * @param bi1 First iterator.
152 * @param bi2 Second iterator.
153 * @return @c True if less equal. */
154 template<typename RandomAccessIterator, typename Comparator>
155 inline bool
156 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
157 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
158 {
159 if (bi2.current == bi2.end) //bi1 is sup
160 return bi1.current != bi1.end; //bi2 is not sup
161 if (bi1.current == bi1.end) //bi2 is sup
162 return false;
163 return !(bi1.comp)(*bi2, *bi1); //normal compare
164 }
165
166 template<typename RandomAccessIterator, typename Comparator>
167 class unguarded_iterator;
168
169 template<typename RandomAccessIterator, typename Comparator>
170 inline bool
171 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
172 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
173
174 template<typename RandomAccessIterator, typename Comparator>
175 inline bool
176 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
177 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
178
179 template<typename RandomAccessIterator, typename Comparator>
180 class unguarded_iterator
181 {
182 private:
183 /** @brief Current iterator position. */
184 RandomAccessIterator& current;
185 /** @brief Comparator. */
186 mutable Comparator& comp;
187
188 public:
189 /** @brief Constructor. Sets iterator to beginning of sequence.
190 * @param begin Begin iterator of sequence.
191 * @param end Unused, only for compatibility.
192 * @param comp Unused, only for compatibility. */
193 inline unguarded_iterator(RandomAccessIterator begin,
194 RandomAccessIterator end, Comparator& comp)
195 : current(begin), comp(comp)
196 { }
197
198 /** @brief Pre-increment operator.
199 * @return This. */
200 inline unguarded_iterator<RandomAccessIterator, Comparator>&
201 operator++()
202 {
203 current++;
204 return *this;
205 }
206
207 /** @brief Dereference operator.
208 * @return Referenced element. */
209 inline typename std::iterator_traits<RandomAccessIterator>::value_type
210 operator*()
211 { return *current; }
212
213 /** @brief Convert to wrapped iterator.
214 * @return Wrapped iterator. */
215 inline
216 operator RandomAccessIterator()
217 { return current; }
218
219 friend bool
220 operator< <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
221
222 friend bool
223 operator<= <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
224 };
225
226 /** @brief Compare two elements referenced by unguarded iterators.
227 * @param bi1 First iterator.
228 * @param bi2 Second iterator.
229 * @return @c True if less. */
230 template<typename RandomAccessIterator, typename Comparator>
231 inline bool
232 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
233 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
234 {
235 // Normal compare.
236 return (bi1.comp)(*bi1, *bi2);
237 }
238
239 /** @brief Compare two elements referenced by unguarded iterators.
240 * @param bi1 First iterator.
241 * @param bi2 Second iterator.
242 * @return @c True if less equal. */
243 template<typename RandomAccessIterator, typename Comparator>
244 inline bool
245 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
246 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
247 {
248 // Normal compare.
249 return !(bi1.comp)(*bi2, *bi1);
250 }
251
252 /** Prepare a set of sequences to be merged without a (end) guard
253 * @param seqs_begin
254 * @param seqs_end
255 * @param comp
256 * @param min_sequence
257 * @param stable
258 * @pre (seqs_end - seqs_begin > 0) */
259 template<typename RandomAccessIteratorIterator, typename Comparator>
260 typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
261 prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
262 RandomAccessIteratorIterator seqs_end, Comparator comp,
263 int& min_sequence, bool stable)
264 {
265 _GLIBCXX_CALL(seqs_end - seqs_begin)
266
267 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
268 RandomAccessIterator1;
269 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
270 value_type;
271 typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
272 difference_type;
273
274 if ((*seqs_begin).first == (*seqs_begin).second)
275 {
276 // Empty sequence found, it's the first one.
277 min_sequence = 0;
278 return -1;
279 }
280
281 // Last element in sequence.
282 value_type min = *((*seqs_begin).second - 1);
283 min_sequence = 0;
284 for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
285 {
286 if ((*s).first == (*s).second)
287 {
288 // Empty sequence found.
289 min_sequence = static_cast<int>(s - seqs_begin);
290 return -1;
291 }
292
293 // Last element in sequence.
294 const value_type& v = *((*s).second - 1);
295 if (comp(v, min)) //strictly smaller
296 {
297 min = v;
298 min_sequence = static_cast<int>(s - seqs_begin);
299 }
300 }
301
302 difference_type overhang_size = 0;
303
304 int s = 0;
305 for (s = 0; s <= min_sequence; s++)
306 {
307 RandomAccessIterator1 split;
308 if (stable)
309 split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
310 min, comp);
311 else
312 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
313 min, comp);
314
315 overhang_size += seqs_begin[s].second - split;
316 }
317
318 for (; s < (seqs_end - seqs_begin); s++)
319 {
320 RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp);
321 overhang_size += seqs_begin[s].second - split;
322 }
323
324 // So many elements will be left over afterwards.
325 return overhang_size;
326 }
327
328 /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
329 * @param seqs_begin
330 * @param seqs_end
331 * @param comp */
332 template<typename RandomAccessIteratorIterator, typename Comparator>
333 typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
334 prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
335 RandomAccessIteratorIterator seqs_end,
336 Comparator comp)
337 {
338 _GLIBCXX_CALL(seqs_end - seqs_begin)
339
340 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
341 RandomAccessIterator1;
342 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
343 value_type;
344 typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
345 difference_type;
346
347 // Last element in sequence.
348 value_type* max = NULL;
349 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
350 {
351 if ((*s).first == (*s).second)
352 continue;
353
354 // Last element in sequence.
355 value_type& v = *((*s).second - 1);
356
357 // Strictly greater.
358 if (!max || comp(*max, v))
359 max = &v;
360 }
361
362 difference_type overhang_size = 0;
363 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
364 {
365 RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second,
366 *max, comp);
367 overhang_size += (*s).second - split;
368
369 // Set sentinel.
370 *((*s).second) = *max;
371 }
372
373 // So many elements will be left over afterwards.
374 return overhang_size;
375 }
376
377 /** @brief Highly efficient 3-way merging procedure.
378 * @param seqs_begin Begin iterator of iterator pair input sequence.
379 * @param seqs_end End iterator of iterator pair input sequence.
380 * @param target Begin iterator out output sequence.
381 * @param comp Comparator.
382 * @param length Maximum length to merge.
383 * @param stable Unused, stable anyway.
384 * @return End iterator of output sequence. */
385 template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
386 RandomAccessIterator3
387 multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
388 {
389 _GLIBCXX_CALL(length);
390
391 typedef _DifferenceTp difference_type;
392
393 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
394 RandomAccessIterator1;
395 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
396 value_type;
397
398 if (length == 0)
399 return target;
400
401 iterator<RandomAccessIterator1, Comparator>
402 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
403 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
404 seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
405
406 if (seq0 <= seq1)
407 {
408 if (seq1 <= seq2)
409 goto s012;
410 else
411 if (seq2 < seq0)
412 goto s201;
413 else
414 goto s021;
415 }
416 else
417 {
418 if (seq1 <= seq2)
419 {
420 if (seq0 <= seq2)
421 goto s102;
422 else
423 goto s120;
424 }
425 else
426 goto s210;
427 }
428
429 #define Merge3Case(a,b,c,c0,c1) \
430 s ## a ## b ## c : \
431 *target = *seq ## a; \
432 ++target; \
433 length--; \
434 ++seq ## a; \
435 if (length == 0) goto finish; \
436 if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
437 if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
438 goto s ## b ## c ## a;
439
440 Merge3Case(0, 1, 2, <=, <=);
441 Merge3Case(1, 2, 0, <=, < );
442 Merge3Case(2, 0, 1, < , < );
443 Merge3Case(1, 0, 2, < , <=);
444 Merge3Case(0, 2, 1, <=, <=);
445 Merge3Case(2, 1, 0, < , < );
446
447 #undef Merge3Case
448
449 finish:
450 ;
451
452 seqs_begin[0].first = seq0;
453 seqs_begin[1].first = seq1;
454 seqs_begin[2].first = seq2;
455
456 return target;
457 }
458
459 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
460 RandomAccessIterator3
461 multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
462 {
463 _GLIBCXX_CALL(length);
464
465 typedef _DifferenceTp difference_type;
466 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
467 RandomAccessIterator1;
468 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
469 value_type;
470
471 int min_seq;
472 RandomAccessIterator3 target_end;
473
474 // Stable anyway.
475 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
476
477 difference_type total_length = 0;
478 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
479 total_length += LENGTH(*s);
480
481 if (overhang != -1)
482 {
483 difference_type unguarded_length = std::min(length, total_length - overhang);
484 target_end = multiway_merge_3_variant<unguarded_iterator>
485 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
486 overhang = length - unguarded_length;
487 }
488 else
489 {
490 // Empty sequence found.
491 overhang = length;
492 target_end = target;
493 }
494
495 #if _GLIBCXX_ASSERTIONS
496 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
497 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
498 #endif
499
500 switch (min_seq)
501 {
502 case 0:
503 // Iterators will be advanced accordingly.
504 target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
505 seqs_begin[2].first, seqs_begin[2].second,
506 target_end, overhang, comp);
507 break;
508 case 1:
509 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
510 seqs_begin[2].first, seqs_begin[2].second,
511 target_end, overhang, comp);
512 break;
513 case 2:
514 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
515 seqs_begin[1].first, seqs_begin[1].second,
516 target_end, overhang, comp);
517 break;
518 default:
519 _GLIBCXX_PARALLEL_ASSERT(false);
520 }
521
522 #if _GLIBCXX_ASSERTIONS
523 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
524 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
525 #endif
526
527 return target_end;
528 }
529
530 /** @brief Highly efficient 4-way merging procedure.
531 * @param seqs_begin Begin iterator of iterator pair input sequence.
532 * @param seqs_end End iterator of iterator pair input sequence.
533 * @param target Begin iterator out output sequence.
534 * @param comp Comparator.
535 * @param length Maximum length to merge.
536 * @param stable Unused, stable anyway.
537 * @return End iterator of output sequence. */
538 template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
539 RandomAccessIterator3
540 multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
541 {
542 _GLIBCXX_CALL(length);
543 typedef _DifferenceTp difference_type;
544
545 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
546 RandomAccessIterator1;
547 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
548 value_type;
549
550 iterator<RandomAccessIterator1, Comparator>
551 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
552 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
553 seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
554 seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
555
556 #define Decision(a,b,c,d) { \
557 if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \
558 if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \
559 if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \
560 goto s ## a ## b ## c ## d; }
561
562 if (seq0 <= seq1)
563 {
564 if (seq1 <= seq2)
565 Decision(0,1,2,3)
566 else
567 if (seq2 < seq0)
568 Decision(2,0,1,3)
569 else
570 Decision(0,2,1,3)
571 }
572 else
573 {
574 if (seq1 <= seq2)
575 {
576 if (seq0 <= seq2)
577 Decision(1,0,2,3)
578 else
579 Decision(1,2,0,3)
580 }
581 else
582 Decision(2,1,0,3)
583 }
584
585 #define Merge4Case(a,b,c,d,c0,c1,c2) \
586 s ## a ## b ## c ## d: \
587 if (length == 0) goto finish; \
588 *target = *seq ## a; \
589 ++target; \
590 length--; \
591 ++seq ## a; \
592 if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \
593 if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \
594 if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \
595 goto s ## b ## c ## d ## a;
596
597 Merge4Case(0, 1, 2, 3, <=, <=, <=);
598 Merge4Case(0, 1, 3, 2, <=, <=, <=);
599 Merge4Case(0, 2, 1, 3, <=, <=, <=);
600 Merge4Case(0, 2, 3, 1, <=, <=, <=);
601 Merge4Case(0, 3, 1, 2, <=, <=, <=);
602 Merge4Case(0, 3, 2, 1, <=, <=, <=);
603 Merge4Case(1, 0, 2, 3, < , <=, <=);
604 Merge4Case(1, 0, 3, 2, < , <=, <=);
605 Merge4Case(1, 2, 0, 3, <=, < , <=);
606 Merge4Case(1, 2, 3, 0, <=, <=, < );
607 Merge4Case(1, 3, 0, 2, <=, < , <=);
608 Merge4Case(1, 3, 2, 0, <=, <=, < );
609 Merge4Case(2, 0, 1, 3, < , < , <=);
610 Merge4Case(2, 0, 3, 1, < , <=, < );
611 Merge4Case(2, 1, 0, 3, < , < , <=);
612 Merge4Case(2, 1, 3, 0, < , <=, < );
613 Merge4Case(2, 3, 0, 1, <=, < , < );
614 Merge4Case(2, 3, 1, 0, <=, < , < );
615 Merge4Case(3, 0, 1, 2, < , < , < );
616 Merge4Case(3, 0, 2, 1, < , < , < );
617 Merge4Case(3, 1, 0, 2, < , < , < );
618 Merge4Case(3, 1, 2, 0, < , < , < );
619 Merge4Case(3, 2, 0, 1, < , < , < );
620 Merge4Case(3, 2, 1, 0, < , < , < );
621
622 #undef Merge4Case
623 #undef Decision
624
625 finish:
626 ;
627
628 seqs_begin[0].first = seq0;
629 seqs_begin[1].first = seq1;
630 seqs_begin[2].first = seq2;
631 seqs_begin[3].first = seq3;
632
633 return target;
634 }
635
636 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
637 RandomAccessIterator3
638 multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
639 {
640 _GLIBCXX_CALL(length);
641 typedef _DifferenceTp difference_type;
642
643 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
644 RandomAccessIterator1;
645 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
646 value_type;
647
648 int min_seq;
649 RandomAccessIterator3 target_end;
650
651 // Stable anyway.
652 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
653
654 difference_type total_length = 0;
655 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
656 total_length += LENGTH(*s);
657
658 if (overhang != -1)
659 {
660 difference_type unguarded_length = std::min(length, total_length - overhang);
661 target_end = multiway_merge_4_variant<unguarded_iterator>
662 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
663 overhang = length - unguarded_length;
664 }
665 else
666 {
667 // Empty sequence found.
668 overhang = length;
669 target_end = target;
670 }
671
672 #if _GLIBCXX_ASSERTIONS
673 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
674 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
675 #endif
676
677 std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > one_missing(seqs_begin, seqs_end);
678 one_missing.erase(one_missing.begin() + min_seq); //remove
679
680 target_end = multiway_merge_3_variant<guarded_iterator>(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable);
681
682 // Insert back again.
683 one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
684 // Write back modified iterators.
685 copy(one_missing.begin(), one_missing.end(), seqs_begin);
686
687 #if _GLIBCXX_ASSERTIONS
688 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
689 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
690 #endif
691
692 return target_end;
693 }
694
695 /** @brief Basic multi-way merging procedure.
696 *
697 * The head elements are kept in a sorted array, new heads are
698 * inserted linearly.
699 * @param seqs_begin Begin iterator of iterator pair input sequence.
700 * @param seqs_end End iterator of iterator pair input sequence.
701 * @param target Begin iterator out output sequence.
702 * @param comp Comparator.
703 * @param length Maximum length to merge.
704 * @param stable Stable merging incurs a performance penalty.
705 * @return End iterator of output sequence.
706 */
707 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
708 RandomAccessIterator3
709 multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
710 {
711 _GLIBCXX_CALL(length)
712
713 typedef _DifferenceTp difference_type;
714 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
715 RandomAccessIterator1;
716 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
717 value_type;
718
719 // Num remaining pieces.
720 int k = static_cast<int>(seqs_end - seqs_begin), nrp;
721
722 value_type* pl = static_cast<value_type*>(::operator new(sizeof(value_type) * k));
723 int* source = new int[k];
724 difference_type total_length = 0;
725
726 #define POS(i) seqs_begin[(i)].first
727 #define STOPS(i) seqs_begin[(i)].second
728
729 // Write entries into queue.
730 nrp = 0;
731 for (int pi = 0; pi < k; pi++)
732 {
733 if (STOPS(pi) != POS(pi))
734 {
735 pl[nrp] = *(POS(pi));
736 source[nrp] = pi;
737 nrp++;
738 total_length += LENGTH(seqs_begin[pi]);
739 }
740 }
741
742 if (stable)
743 {
744 for (int k = 0; k < nrp - 1; k++)
745 for (int pi = nrp - 1; pi > k; pi--)
746 if (comp(pl[pi], pl[pi - 1]) ||
747 (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
748 {
749 std::swap(pl[pi - 1], pl[pi]);
750 std::swap(source[pi - 1], source[pi]);
751 }
752 }
753 else
754 {
755 for (int k = 0; k < nrp - 1; k++)
756 for (int pi = nrp - 1; pi > k; pi--)
757 if (comp(pl[pi], pl[pi-1]))
758 {
759 std::swap(pl[pi-1], pl[pi]);
760 std::swap(source[pi-1], source[pi]);
761 }
762 }
763
764 // Iterate.
765 if (stable)
766 {
767 int j;
768 while (nrp > 0 && length > 0)
769 {
770 if (source[0] < source[1])
771 {
772 // pl[0] <= pl[1]
773 while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
774 {
775 *target = pl[0];
776 ++target;
777 ++POS(source[0]);
778 length--;
779 if (POS(source[0]) == STOPS(source[0]))
780 {
781 // Move everything to the left.
782 for (int s = 0; s < nrp - 1; s++)
783 {
784 pl[s] = pl[s + 1];
785 source[s] = source[s + 1];
786 }
787 nrp--;
788 break;
789 }
790 else
791 pl[0] = *(POS(source[0]));
792 }
793 }
794 else
795 {
796 // pl[0] < pl[1]
797 while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
798 {
799 *target = pl[0];
800 ++target;
801 ++POS(source[0]);
802 length--;
803 if (POS(source[0]) == STOPS(source[0]))
804 {
805 for (int s = 0; s < nrp - 1; s++)
806 {
807 pl[s] = pl[s + 1];
808 source[s] = source[s + 1];
809 }
810 nrp--;
811 break;
812 }
813 else
814 pl[0] = *(POS(source[0]));
815 }
816 }
817
818 // Sink down.
819 j = 1;
820 while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
821 (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1]))))
822 {
823 std::swap(pl[j - 1], pl[j]);
824 std::swap(source[j - 1], source[j]);
825 j++;
826 }
827 }
828 }
829 else
830 {
831 int j;
832 while (nrp > 0 && length > 0)
833 {
834 // pl[0] <= pl[1]
835 while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
836 {
837 *target = pl[0];
838 ++target;
839 ++POS(source[0]);
840 length--;
841 if (POS(source[0]) == STOPS(source[0]))
842 {
843 for (int s = 0; s < (nrp - 1); s++)
844 {
845 pl[s] = pl[s + 1];
846 source[s] = source[s + 1];
847 }
848 nrp--;
849 break;
850 }
851 else
852 pl[0] = *(POS(source[0]));
853 }
854
855 // Sink down.
856 j = 1;
857 while ((j < nrp) && comp(pl[j], pl[j - 1]))
858 {
859 std::swap(pl[j - 1], pl[j]);
860 std::swap(source[j - 1], source[j]);
861 j++;
862 }
863 }
864 }
865
866 delete[] pl;
867 delete[] source;
868
869 return target;
870 }
871
872 /** @brief Multi-way merging procedure for a high branching factor,
873 * guarded case.
874 *
875 * The head elements are kept in a loser tree.
876 * @param seqs_begin Begin iterator of iterator pair input sequence.
877 * @param seqs_end End iterator of iterator pair input sequence.
878 * @param target Begin iterator out output sequence.
879 * @param comp Comparator.
880 * @param length Maximum length to merge.
881 * @param stable Stable merging incurs a performance penalty.
882 * @return End iterator of output sequence.
883 */
884 template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
885 RandomAccessIterator3
886 multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
887 {
888 _GLIBCXX_CALL(length)
889
890 typedef _DifferenceTp difference_type;
891 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
892 RandomAccessIterator1;
893 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
894 value_type;
895
896 int k = static_cast<int>(seqs_end - seqs_begin);
897
898 LT lt(k, comp);
899
900 difference_type total_length = 0;
901
902 // Default value for potentially non-default-constructible types.
903 value_type* arbitrary_element = NULL;
904
905 for (int t = 0; t < k; t++)
906 {
907 if(arbitrary_element == NULL && LENGTH(seqs_begin[t]) > 0)
908 arbitrary_element = &(*seqs_begin[t].first);
909 total_length += LENGTH(seqs_begin[t]);
910 }
911
912 if(total_length == 0)
913 return target;
914
915 for (int t = 0; t < k; t++)
916 {
917 if (stable)
918 {
919 if (seqs_begin[t].first == seqs_begin[t].second)
920 lt.insert_start_stable(*arbitrary_element, t, true);
921 else
922 lt.insert_start_stable(*seqs_begin[t].first, t, false);
923 }
924 else
925 {
926 if (seqs_begin[t].first == seqs_begin[t].second)
927 lt.insert_start(*arbitrary_element, t, true);
928 else
929 lt.insert_start(*seqs_begin[t].first, t, false);
930 }
931 }
932
933 if (stable)
934 lt.init_stable();
935 else
936 lt.init();
937
938 total_length = std::min(total_length, length);
939
940 int source;
941
942 if (stable)
943 {
944 for (difference_type i = 0; i < total_length; i++)
945 {
946 // Take out.
947 source = lt.get_min_source();
948
949 *(target++) = *(seqs_begin[source].first++);
950
951 // Feed.
952 if (seqs_begin[source].first == seqs_begin[source].second)
953 lt.delete_min_insert_stable(*arbitrary_element, true);
954 else
955 // Replace from same source.
956 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
957
958 }
959 }
960 else
961 {
962 for (difference_type i = 0; i < total_length; i++)
963 {
964 //take out
965 source = lt.get_min_source();
966
967 *(target++) = *(seqs_begin[source].first++);
968
969 // Feed.
970 if (seqs_begin[source].first == seqs_begin[source].second)
971 lt.delete_min_insert(*arbitrary_element, true);
972 else
973 // Replace from same source.
974 lt.delete_min_insert(*seqs_begin[source].first, false);
975 }
976 }
977
978 return target;
979 }
980
981 /** @brief Multi-way merging procedure for a high branching factor,
982 * unguarded case.
983 *
984 * The head elements are kept in a loser tree.
985 * @param seqs_begin Begin iterator of iterator pair input sequence.
986 * @param seqs_end End iterator of iterator pair input sequence.
987 * @param target Begin iterator out output sequence.
988 * @param comp Comparator.
989 * @param length Maximum length to merge.
990 * @param stable Stable merging incurs a performance penalty.
991 * @return End iterator of output sequence.
992 * @pre No input will run out of elements during the merge.
993 */
994 template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
995 RandomAccessIterator3
996 multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
997 {
998 _GLIBCXX_CALL(length)
999 typedef _DifferenceTp difference_type;
1000
1001 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1002 RandomAccessIterator1;
1003 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1004 value_type;
1005
1006 int k = seqs_end - seqs_begin;
1007
1008 LT lt(k, comp);
1009
1010 difference_type total_length = 0;
1011
1012 for (int t = 0; t < k; t++)
1013 {
1014 #if _GLIBCXX_ASSERTIONS
1015 _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
1016 #endif
1017 if (stable)
1018 lt.insert_start_stable(*seqs_begin[t].first, t, false);
1019 else
1020 lt.insert_start(*seqs_begin[t].first, t, false);
1021
1022 total_length += LENGTH(seqs_begin[t]);
1023 }
1024
1025 if (stable)
1026 lt.init_stable();
1027 else
1028 lt.init();
1029
1030 // Do not go past end.
1031 length = std::min(total_length, length);
1032
1033 int source;
1034
1035 #if _GLIBCXX_ASSERTIONS
1036 difference_type i = 0;
1037 #endif
1038
1039 if (stable)
1040 {
1041 RandomAccessIterator3 target_end = target + length;
1042 while (target < target_end)
1043 {
1044 // Take out.
1045 source = lt.get_min_source();
1046
1047 #if _GLIBCXX_ASSERTIONS
1048 _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
1049 #endif
1050
1051 *(target++) = *(seqs_begin[source].first++);
1052
1053 #if _GLIBCXX_ASSERTIONS
1054 _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1));
1055 i++;
1056 #endif
1057 // Feed.
1058 // Replace from same source.
1059 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
1060
1061 }
1062 }
1063 else
1064 {
1065 RandomAccessIterator3 target_end = target + length;
1066 while (target < target_end)
1067 {
1068 // Take out.
1069 source = lt.get_min_source();
1070
1071 #if _GLIBCXX_ASSERTIONS
1072 if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
1073 printf(" %i %i %i\n", length, i, source);
1074 _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
1075 #endif
1076
1077 *(target++) = *(seqs_begin[source].first++);
1078
1079 #if _GLIBCXX_ASSERTIONS
1080 if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)))
1081 printf(" %i %i %i\n", length, i, source);
1082 _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1));
1083 i++;
1084 #endif
1085 // Feed.
1086 // Replace from same source.
1087 lt.delete_min_insert(*seqs_begin[source].first, false);
1088 }
1089 }
1090
1091 return target;
1092 }
1093
1094 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1095 RandomAccessIterator3
1096 multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
1097 {
1098 _GLIBCXX_CALL(length)
1099
1100 typedef _DifferenceTp difference_type;
1101
1102 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1103 RandomAccessIterator1;
1104 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1105 value_type;
1106
1107 int min_seq;
1108 RandomAccessIterator3 target_end;
1109 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
1110 comp, min_seq, stable);
1111
1112 difference_type total_length = 0;
1113 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1114 total_length += LENGTH(*s);
1115
1116 if (overhang != -1)
1117 {
1118 difference_type unguarded_length = std::min(length, total_length - overhang);
1119 target_end = multiway_merge_loser_tree_unguarded
1120 <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
1121 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1122 overhang = length - unguarded_length;
1123 }
1124 else
1125 {
1126 // Empty sequence found.
1127 overhang = length;
1128 target_end = target;
1129 }
1130
1131 #if _GLIBCXX_ASSERTIONS
1132 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1133 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1134 #endif
1135
1136 target_end = multiway_merge_loser_tree
1137 <typename loser_tree_traits<value_type, Comparator>::LT>
1138 (seqs_begin, seqs_end, target_end, comp, overhang, stable);
1139
1140 #if _GLIBCXX_ASSERTIONS
1141 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1142 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1143 #endif
1144
1145 return target_end;
1146 }
1147
1148 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1149 RandomAccessIterator3
1150 multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
1151 {
1152 _GLIBCXX_CALL(length)
1153
1154 typedef _DifferenceTp difference_type;
1155 typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
1156 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1157 RandomAccessIterator1;
1158 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1159 value_type;
1160 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1161 RandomAccessIterator1;
1162
1163 RandomAccessIterator3 target_end;
1164 difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
1165
1166 difference_type total_length = 0;
1167 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1168 {
1169 total_length += LENGTH(*s);
1170
1171 // Sentinel spot.
1172 (*s).second++;
1173 }
1174
1175 difference_type unguarded_length = std::min(length, total_length - overhang);
1176 target_end = multiway_merge_loser_tree_unguarded
1177 <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
1178 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1179 overhang = length - unguarded_length;
1180
1181 #if _GLIBCXX_ASSERTIONS
1182 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1183 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1184 #endif
1185
1186 // Copy rest stable.
1187 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++)
1188 {
1189 // Restore.
1190 (*s).second--;
1191 difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s));
1192 target_end = std::copy((*s).first, (*s).first + local_length, target_end);
1193 (*s).first += local_length;
1194 overhang -= local_length;
1195 }
1196
1197 #if _GLIBCXX_ASSERTIONS
1198 _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
1199 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1200 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1201 #endif
1202
1203 return target_end;
1204 }
1205
1206 /** @brief Sequential multi-way merging switch.
1207 *
1208 * The decision if based on the branching factor and runtime settings.
1209 * @param seqs_begin Begin iterator of iterator pair input sequence.
1210 * @param seqs_end End iterator of iterator pair input sequence.
1211 * @param target Begin iterator out output sequence.
1212 * @param comp Comparator.
1213 * @param length Maximum length to merge.
1214 * @param stable Stable merging incurs a performance penalty.
1215 * @param sentinel The sequences have a sentinel element.
1216 * @return End iterator of output sequence. */
1217 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1218 RandomAccessIterator3
1219 multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag)
1220 {
1221 _GLIBCXX_CALL(length)
1222
1223 typedef _DifferenceTp difference_type;
1224 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1225 RandomAccessIterator1;
1226 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1227 value_type;
1228
1229 #if _GLIBCXX_ASSERTIONS
1230 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1231 _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
1232 #endif
1233
1234 RandomAccessIterator3 return_target = target;
1235 int k = static_cast<int>(seqs_end - seqs_begin);
1236
1237 Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm;
1238
1239 if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
1240 mwma = Settings::LOSER_TREE_COMBINED;
1241
1242 switch (k)
1243 {
1244 case 0:
1245 break;
1246 case 1:
1247 return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target);
1248 seqs_begin[0].first += length;
1249 break;
1250 case 2:
1251 return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp);
1252 break;
1253 case 3:
1254 switch (mwma)
1255 {
1256 case Settings::LOSER_TREE_COMBINED:
1257 return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable);
1258 break;
1259 case Settings::LOSER_TREE_SENTINEL:
1260 return_target = multiway_merge_3_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1261 break;
1262 default:
1263 return_target = multiway_merge_3_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1264 break;
1265 }
1266 break;
1267 case 4:
1268 switch (mwma)
1269 {
1270 case Settings::LOSER_TREE_COMBINED:
1271 return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable);
1272 break;
1273 case Settings::LOSER_TREE_SENTINEL:
1274 return_target = multiway_merge_4_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1275 break;
1276 default:
1277 return_target = multiway_merge_4_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1278 break;
1279 }
1280 break;
1281 default:
1282 {
1283 switch (mwma)
1284 {
1285 case Settings::BUBBLE:
1286 return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable);
1287 break;
1288 #if _GLIBCXX_LOSER_TREE_EXPLICIT
1289 case Settings::LOSER_TREE_EXPLICIT:
1290 return_target = multiway_merge_loser_tree<LoserTreeExplicit<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
1291 break;
1292 #endif
1293 #if _GLIBCXX_LOSER_TREE
1294 case Settings::LOSER_TREE:
1295 return_target = multiway_merge_loser_tree<LoserTree<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
1296 break;
1297 #endif
1298 #if _GLIBCXX_LOSER_TREE_COMBINED
1299 case Settings::LOSER_TREE_COMBINED:
1300 return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable);
1301 break;
1302 #endif
1303 #if _GLIBCXX_LOSER_TREE_SENTINEL
1304 case Settings::LOSER_TREE_SENTINEL:
1305 return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable);
1306 break;
1307 #endif
1308 default:
1309 // multiway_merge algorithm not implemented.
1310 _GLIBCXX_PARALLEL_ASSERT(0);
1311 break;
1312 }
1313 }
1314 }
1315 #if _GLIBCXX_ASSERTIONS
1316 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1317 #endif
1318
1319 return return_target;
1320 }
1321
1322 /** @brief Parallel multi-way merge routine.
1323 *
1324 * The decision if based on the branching factor and runtime settings.
1325 * @param seqs_begin Begin iterator of iterator pair input sequence.
1326 * @param seqs_end End iterator of iterator pair input sequence.
1327 * @param target Begin iterator out output sequence.
1328 * @param comp Comparator.
1329 * @param length Maximum length to merge.
1330 * @param stable Stable merging incurs a performance penalty.
1331 * @param sentinel Ignored.
1332 * @return End iterator of output sequence.
1333 */
1334 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1335 RandomAccessIterator3
1336 parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel)
1337 {
1338 _GLIBCXX_CALL(length)
1339
1340 typedef _DifferenceTp difference_type;
1341 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1342 RandomAccessIterator1;
1343 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1344 value_type;
1345
1346 #if _GLIBCXX_ASSERTIONS
1347 for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++)
1348 _GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp));
1349 #endif
1350
1351 // k sequences.
1352 int k = static_cast<int>(seqs_end - seqs_begin);
1353
1354 difference_type total_length = 0;
1355 for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++)
1356 total_length += LENGTH(*raii);
1357
1358 _GLIBCXX_CALL(total_length)
1359
1360 if (total_length == 0 || k == 0)
1361 return target;
1362
1363 thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
1364
1365 bool tight = (total_length == length);
1366
1367 // Thread t will have to merge pieces[iam][0..k - 1]
1368 std::vector<std::pair<difference_type, difference_type> >* pieces = new std::vector<std::pair<difference_type, difference_type> >[num_threads];
1369 for (int s = 0; s < num_threads; s++)
1370 pieces[s].resize(k);
1371
1372 difference_type num_samples = Settings::merge_oversampling * num_threads;
1373
1374 if (Settings::multiway_merge_splitting == Settings::SAMPLING)
1375 {
1376 value_type* samples = static_cast<value_type*>(::operator new(sizeof(value_type) * k * num_samples));
1377 // Sample.
1378 for (int s = 0; s < k; s++)
1379 for (int i = 0; (difference_type)i < num_samples; i++)
1380 {
1381 difference_type sample_index = static_cast<difference_type>(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length));
1382 samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
1383 }
1384
1385 if (stable)
1386 __gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp);
1387 else
1388 __gnu_sequential::sort(samples, samples + (num_samples * k), comp);
1389
1390 for (int slab = 0; slab < num_threads; slab++)
1391 // For each slab / processor.
1392 for (int seq = 0; seq < k; seq++)
1393 {
1394 // For each sequence.
1395 if (slab > 0)
1396 pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first;
1397 else
1398 {
1399 // Absolute beginning.
1400 pieces[slab][seq].first = 0;
1401 }
1402 if ((slab + 1) < num_threads)
1403 pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first;
1404 else
1405 pieces[slab][seq].second = LENGTH(seqs_begin[seq]); //absolute ending
1406 }
1407 delete[] samples;
1408 }
1409 else
1410 {
1411 // (Settings::multiway_merge_splitting == Settings::EXACT).
1412 std::vector<RandomAccessIterator1>* offsets = new std::vector<RandomAccessIterator1>[num_threads];
1413 std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > se(k);
1414
1415 copy(seqs_begin, seqs_end, se.begin());
1416
1417 difference_type* borders = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_threads + 1)));
1418 equally_split(length, num_threads, borders);
1419
1420 for (int s = 0; s < (num_threads - 1); s++)
1421 {
1422 offsets[s].resize(k);
1423 multiseq_partition(se.begin(), se.end(), borders[s + 1],
1424 offsets[s].begin(), comp);
1425
1426 // Last one also needed and available.
1427 if (!tight)
1428 {
1429 offsets[num_threads - 1].resize(k);
1430 multiseq_partition(se.begin(), se.end(),
1431 difference_type(length),
1432 offsets[num_threads - 1].begin(), comp);
1433 }
1434 }
1435
1436
1437 for (int slab = 0; slab < num_threads; slab++)
1438 {
1439 // For each slab / processor.
1440 for (int seq = 0; seq < k; seq++)
1441 {
1442 // For each sequence.
1443 if (slab == 0)
1444 {
1445 // Absolute beginning.
1446 pieces[slab][seq].first = 0;
1447 }
1448 else
1449 pieces[slab][seq].first = pieces[slab - 1][seq].second;
1450 if (!tight || slab < (num_threads - 1))
1451 pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first;
1452 else
1453 {
1454 // slab == num_threads - 1
1455 pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
1456 }
1457 }
1458 }
1459 delete[] offsets;
1460 }
1461
1462 # pragma omp parallel num_threads(num_threads)
1463 {
1464 thread_index_t iam = omp_get_thread_num();
1465
1466 difference_type target_position = 0;
1467
1468 for (int c = 0; c < k; c++)
1469 target_position += pieces[iam][c].first;
1470
1471 if (k > 2)
1472 {
1473 std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks = new std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
1474
1475 difference_type local_length = 0;
1476 for (int s = 0; s < k; s++)
1477 {
1478 chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second);
1479 local_length += LENGTH(chunks[s]);
1480 }
1481
1482 multiway_merge(chunks, chunks + k, target + target_position, comp,
1483 std::min(local_length, length - target_position),
1484 stable, false, sequential_tag());
1485
1486 delete[] chunks;
1487 }
1488 else if (k == 2)
1489 {
1490 RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first;
1491 merge_advance(begin0,
1492 seqs_begin[0].first + pieces[iam][0].second,
1493 begin1,
1494 seqs_begin[1].first + pieces[iam][1].second,
1495 target + target_position,
1496 (pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
1497 comp);
1498 }
1499 }
1500
1501 #if _GLIBCXX_ASSERTIONS
1502 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1503 #endif
1504
1505 // Update ends of sequences.
1506 for (int s = 0; s < k; s++)
1507 seqs_begin[s].first += pieces[num_threads - 1][s].second;
1508
1509 delete[] pieces;
1510
1511 return target + length;
1512 }
1513
1514 /**
1515 * @brief Multi-way merging front-end.
1516 * @param seqs_begin Begin iterator of iterator pair input sequence.
1517 * @param seqs_end End iterator of iterator pair input sequence.
1518 * @param target Begin iterator out output sequence.
1519 * @param comp Comparator.
1520 * @param length Maximum length to merge.
1521 * @param stable Stable merging incurs a performance penalty.
1522 * @return End iterator of output sequence.
1523 */
1524 template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1525 RandomAccessIterator3
1526 multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
1527 RandomAccessIteratorPairIterator seqs_end,
1528 RandomAccessIterator3 target, Comparator comp,
1529 _DifferenceTp length, bool stable)
1530 {
1531 typedef _DifferenceTp difference_type;
1532 _GLIBCXX_CALL(seqs_end - seqs_begin)
1533
1534 if (seqs_begin == seqs_end)
1535 return target;
1536
1537 RandomAccessIterator3 target_end;
1538 if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1539 target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false);
1540 else
1541 target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag());
1542
1543 return target_end;
1544 }
1545
1546 /** @brief Multi-way merging front-end.
1547 * @param seqs_begin Begin iterator of iterator pair input sequence.
1548 * @param seqs_end End iterator of iterator pair input sequence.
1549 * @param target Begin iterator out output sequence.
1550 * @param comp Comparator.
1551 * @param length Maximum length to merge.
1552 * @param stable Stable merging incurs a performance penalty.
1553 * @return End iterator of output sequence.
1554 * @pre For each @c i, @c seqs_begin[i].second must be the end
1555 * marker of the sequence, but also reference the one more sentinel
1556 * element. */
1557 template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1558 RandomAccessIterator3
1559 multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
1560 RandomAccessIteratorPairIterator seqs_end,
1561 RandomAccessIterator3 target,
1562 Comparator comp,
1563 _DifferenceTp length,
1564 bool stable)
1565 {
1566 typedef _DifferenceTp difference_type;
1567
1568 if (seqs_begin == seqs_end)
1569 return target;
1570
1571 _GLIBCXX_CALL(seqs_end - seqs_begin)
1572
1573 if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1574 return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits<RandomAccessIterator3>::difference_type)length, stable, true);
1575 else
1576 return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag());
1577 }
1578 }
1579
1580 #endif