base.h (__gnu_parallel::less<Tp>): Add partial specialization for one argument.
[gcc.git] / libstdc++-v3 / include / parallel / multiway_merge.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
20
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
29 // Public License.
30
31 /** @file parallel/multiway_merge.h
32 * @brief Implementation of sequential and parallel multiway merge.
33 * This file is a GNU parallel extension to the Standard C++ Library.
34 */
35
36 // Written by Johannes Singler.
37
38 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
39 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
40
41 #include <vector>
42
43 #include <bits/stl_algo.h>
44 #include <parallel/features.h>
45 #include <parallel/parallel.h>
46 #include <parallel/merge.h>
47 #include <parallel/losertree.h>
48 #include <parallel/timing.h>
49 #if _GLIBCXX_ASSERTIONS
50 #include <parallel/checkers.h>
51 #endif
52
53 /** @brief Length of a sequence described by a pair of iterators. */
54 #define LENGTH(s) ((s).second - (s).first)
55
56 // XXX need iterator typedefs
57 namespace __gnu_parallel
58 {
59 template<typename RandomAccessIterator, typename Comparator>
60 class guarded_iterator;
61
62 template<typename RandomAccessIterator, typename Comparator>
63 inline bool
64 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
65 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
66
67 template<typename RandomAccessIterator, typename Comparator>
68 inline bool
69 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
70 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
71
72 /** @brief Iterator wrapper supporting an implicit supremum at the end
73 of the sequence, dominating all comparisons.
74 * Deriving from RandomAccessIterator is not possible since
75 * RandomAccessIterator need not be a class.
76 */
77 template<typename RandomAccessIterator, typename Comparator>
78 class guarded_iterator
79 {
80 private:
81 /** @brief Current iterator position. */
82 RandomAccessIterator current;
83
84 /** @brief End iterator of the sequence. */
85 RandomAccessIterator end;
86
87 /** @brief Comparator. */
88 Comparator& comp;
89
90 public:
91 /** @brief Constructor. Sets iterator to beginning of sequence.
92 * @param begin Begin iterator of sequence.
93 * @param end End iterator of sequence.
94 * @param comp Comparator provided for associated overloaded
95 * compare operators. */
96 inline guarded_iterator(RandomAccessIterator begin,
97 RandomAccessIterator end, Comparator& comp)
98 : current(begin), end(end), comp(comp)
99 { }
100
101 /** @brief Pre-increment operator.
102 * @return This. */
103 inline guarded_iterator<RandomAccessIterator, Comparator>&
104 operator++()
105 {
106 ++current;
107 return *this;
108 }
109
110 /** @brief Dereference operator.
111 * @return Referenced element. */
112 inline typename std::iterator_traits<RandomAccessIterator>::value_type
113 operator*()
114 { return *current; }
115
116 /** @brief Convert to wrapped iterator.
117 * @return Wrapped iterator. */
118 inline operator RandomAccessIterator()
119 { return current; }
120
121 friend bool
122 operator< <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
123
124 friend bool
125 operator<= <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
126 };
127
128 /** @brief Compare two elements referenced by guarded iterators.
129 * @param bi1 First iterator.
130 * @param bi2 Second iterator.
131 * @return @c True if less. */
132 template<typename RandomAccessIterator, typename Comparator>
133 inline bool
134 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
135 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
136 {
137 if (bi1.current == bi1.end) //bi1 is sup
138 return bi2.current == bi2.end; //bi2 is not sup
139 if (bi2.current == bi2.end) //bi2 is sup
140 return true;
141 return (bi1.comp)(*bi1, *bi2); //normal compare
142 }
143
144 /** @brief Compare two elements referenced by guarded iterators.
145 * @param bi1 First iterator.
146 * @param bi2 Second iterator.
147 * @return @c True if less equal. */
148 template<typename RandomAccessIterator, typename Comparator>
149 inline bool
150 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
151 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
152 {
153 if (bi2.current == bi2.end) //bi1 is sup
154 return bi1.current != bi1.end; //bi2 is not sup
155 if (bi1.current == bi1.end) //bi2 is sup
156 return false;
157 return !(bi1.comp)(*bi2, *bi1); //normal compare
158 }
159
160 template<typename RandomAccessIterator, typename Comparator>
161 class unguarded_iterator;
162
163 template<typename RandomAccessIterator, typename Comparator>
164 inline bool
165 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
166 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
167
168 template<typename RandomAccessIterator, typename Comparator>
169 inline bool
170 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
171 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
172
173 template<typename RandomAccessIterator, typename Comparator>
174 class unguarded_iterator
175 {
176 private:
177 /** @brief Current iterator position. */
178 RandomAccessIterator& current;
179 /** @brief Comparator. */
180 mutable Comparator& comp;
181
182 public:
183 /** @brief Constructor. Sets iterator to beginning of sequence.
184 * @param begin Begin iterator of sequence.
185 * @param end Unused, only for compatibility.
186 * @param comp Unused, only for compatibility. */
187 inline unguarded_iterator(RandomAccessIterator begin,
188 RandomAccessIterator end, Comparator& comp)
189 : current(begin), comp(comp)
190 { }
191
192 /** @brief Pre-increment operator.
193 * @return This. */
194 inline unguarded_iterator<RandomAccessIterator, Comparator>&
195 operator++()
196 {
197 current++;
198 return *this;
199 }
200
201 /** @brief Dereference operator.
202 * @return Referenced element. */
203 inline typename std::iterator_traits<RandomAccessIterator>::value_type
204 operator*()
205 { return *current; }
206
207 /** @brief Convert to wrapped iterator.
208 * @return Wrapped iterator. */
209 inline
210 operator RandomAccessIterator()
211 { return current; }
212
213 friend bool
214 operator< <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
215
216 friend bool
217 operator<= <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
218 };
219
220 /** @brief Compare two elements referenced by unguarded iterators.
221 * @param bi1 First iterator.
222 * @param bi2 Second iterator.
223 * @return @c True if less. */
224 template<typename RandomAccessIterator, typename Comparator>
225 inline bool
226 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
227 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
228 {
229 // Normal compare.
230 return (bi1.comp)(*bi1, *bi2);
231 }
232
233 /** @brief Compare two elements referenced by unguarded iterators.
234 * @param bi1 First iterator.
235 * @param bi2 Second iterator.
236 * @return @c True if less equal. */
237 template<typename RandomAccessIterator, typename Comparator>
238 inline bool
239 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
240 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
241 {
242 // Normal compare.
243 return !(bi1.comp)(*bi2, *bi1);
244 }
245
246 /** Prepare a set of sequences to be merged without a (end) guard
247 * @param seqs_begin
248 * @param seqs_end
249 * @param comp
250 * @param min_sequence
251 * @param stable
252 * @pre (seqs_end - seqs_begin > 0) */
253 template<typename RandomAccessIteratorIterator, typename Comparator>
254 typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
255 prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
256 RandomAccessIteratorIterator seqs_end, Comparator comp,
257 int& min_sequence, bool stable)
258 {
259 _GLIBCXX_CALL(seqs_end - seqs_begin)
260
261 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
262 RandomAccessIterator1;
263 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
264 value_type;
265 typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
266 difference_type;
267
268 if ((*seqs_begin).first == (*seqs_begin).second)
269 {
270 // Empty sequence found, it's the first one.
271 min_sequence = 0;
272 return -1;
273 }
274
275 // Last element in sequence.
276 value_type min = *((*seqs_begin).second - 1);
277 min_sequence = 0;
278 for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
279 {
280 if ((*s).first == (*s).second)
281 {
282 // Empty sequence found.
283 min_sequence = static_cast<int>(s - seqs_begin);
284 return -1;
285 }
286
287 // Last element in sequence.
288 const value_type& v = *((*s).second - 1);
289 if (comp(v, min)) //strictly smaller
290 {
291 min = v;
292 min_sequence = static_cast<int>(s - seqs_begin);
293 }
294 }
295
296 difference_type overhang_size = 0;
297
298 int s = 0;
299 for (s = 0; s <= min_sequence; s++)
300 {
301 RandomAccessIterator1 split;
302 if (stable)
303 split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
304 min, comp);
305 else
306 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
307 min, comp);
308
309 overhang_size += seqs_begin[s].second - split;
310 }
311
312 for (; s < (seqs_end - seqs_begin); s++)
313 {
314 RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp);
315 overhang_size += seqs_begin[s].second - split;
316 }
317
318 // So many elements will be left over afterwards.
319 return overhang_size;
320 }
321
322 /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
323 * @param seqs_begin
324 * @param seqs_end
325 * @param comp */
326 template<typename RandomAccessIteratorIterator, typename Comparator>
327 typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
328 prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
329 RandomAccessIteratorIterator seqs_end,
330 Comparator comp)
331 {
332 _GLIBCXX_CALL(seqs_end - seqs_begin)
333
334 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
335 RandomAccessIterator1;
336 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
337 value_type;
338 typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
339 difference_type;
340
341 // Last element in sequence.
342 value_type max;
343 bool max_found = false;
344 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
345 {
346 if ((*s).first == (*s).second)
347 continue;
348
349 // Last element in sequence.
350 value_type& v = *((*s).second - 1);
351
352 // Strictly greater.
353 if (!max_found || comp(max, v))
354 max = v;
355 max_found = true;
356 }
357
358 difference_type overhang_size = 0;
359
360 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
361 {
362 RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second, max, comp);
363 overhang_size += (*s).second - split;
364
365 // Set sentinel.
366 *((*s).second) = max;
367 }
368
369 // So many elements will be left over afterwards.
370 return overhang_size;
371 }
372
373 /** @brief Highly efficient 3-way merging procedure.
374 * @param seqs_begin Begin iterator of iterator pair input sequence.
375 * @param seqs_end End iterator of iterator pair input sequence.
376 * @param target Begin iterator out output sequence.
377 * @param comp Comparator.
378 * @param length Maximum length to merge.
379 * @param stable Unused, stable anyway.
380 * @return End iterator of output sequence. */
381 template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
382 RandomAccessIterator3
383 multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
384 {
385 _GLIBCXX_CALL(length);
386
387 typedef _DifferenceTp difference_type;
388
389 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
390 RandomAccessIterator1;
391 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
392 value_type;
393
394 if (length == 0)
395 return target;
396
397 iterator<RandomAccessIterator1, Comparator>
398 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
399 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
400 seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
401
402 if (seq0 <= seq1)
403 {
404 if (seq1 <= seq2)
405 goto s012;
406 else
407 if (seq2 < seq0)
408 goto s201;
409 else
410 goto s021;
411 }
412 else
413 {
414 if (seq1 <= seq2)
415 {
416 if (seq0 <= seq2)
417 goto s102;
418 else
419 goto s120;
420 }
421 else
422 goto s210;
423 }
424
425 #define Merge3Case(a,b,c,c0,c1) \
426 s ## a ## b ## c : \
427 *target = *seq ## a; \
428 ++target; \
429 length--; \
430 ++seq ## a; \
431 if (length == 0) goto finish; \
432 if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
433 if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
434 goto s ## b ## c ## a;
435
436 Merge3Case(0, 1, 2, <=, <=);
437 Merge3Case(1, 2, 0, <=, < );
438 Merge3Case(2, 0, 1, < , < );
439 Merge3Case(1, 0, 2, < , <=);
440 Merge3Case(0, 2, 1, <=, <=);
441 Merge3Case(2, 1, 0, < , < );
442
443 #undef Merge3Case
444
445 finish:
446 ;
447
448 seqs_begin[0].first = seq0;
449 seqs_begin[1].first = seq1;
450 seqs_begin[2].first = seq2;
451
452 return target;
453 }
454
455 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
456 RandomAccessIterator3
457 multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
458 {
459 _GLIBCXX_CALL(length);
460
461 typedef _DifferenceTp difference_type;
462 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
463 RandomAccessIterator1;
464 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
465 value_type;
466
467 int min_seq;
468 RandomAccessIterator3 target_end;
469
470 // Stable anyway.
471 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
472
473 difference_type total_length = 0;
474 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
475 total_length += LENGTH(*s);
476
477 if (overhang != -1)
478 {
479 difference_type unguarded_length = std::min(length, total_length - overhang);
480 target_end = multiway_merge_3_variant<unguarded_iterator>
481 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
482 overhang = length - unguarded_length;
483 }
484 else
485 {
486 // Empty sequence found.
487 overhang = length;
488 target_end = target;
489 }
490
491 #if _GLIBCXX_ASSERTIONS
492 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
493 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
494 #endif
495
496 switch (min_seq)
497 {
498 case 0:
499 // Iterators will be advanced accordingly.
500 target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
501 seqs_begin[2].first, seqs_begin[2].second,
502 target_end, overhang, comp);
503 break;
504 case 1:
505 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
506 seqs_begin[2].first, seqs_begin[2].second,
507 target_end, overhang, comp);
508 break;
509 case 2:
510 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
511 seqs_begin[1].first, seqs_begin[1].second,
512 target_end, overhang, comp);
513 break;
514 default:
515 _GLIBCXX_PARALLEL_ASSERT(false);
516 }
517
518 #if _GLIBCXX_ASSERTIONS
519 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
520 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
521 #endif
522
523 return target_end;
524 }
525
526 /** @brief Highly efficient 4-way merging procedure.
527 * @param seqs_begin Begin iterator of iterator pair input sequence.
528 * @param seqs_end End iterator of iterator pair input sequence.
529 * @param target Begin iterator out output sequence.
530 * @param comp Comparator.
531 * @param length Maximum length to merge.
532 * @param stable Unused, stable anyway.
533 * @return End iterator of output sequence. */
534 template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
535 RandomAccessIterator3
536 multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
537 {
538 _GLIBCXX_CALL(length);
539 typedef _DifferenceTp difference_type;
540
541 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
542 RandomAccessIterator1;
543 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
544 value_type;
545
546 iterator<RandomAccessIterator1, Comparator>
547 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
548 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
549 seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
550 seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
551
552 #define Decision(a,b,c,d) { \
553 if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \
554 if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \
555 if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \
556 goto s ## a ## b ## c ## d; }
557
558 if (seq0 <= seq1)
559 {
560 if (seq1 <= seq2)
561 Decision(0,1,2,3)
562 else
563 if (seq2 < seq0)
564 Decision(2,0,1,3)
565 else
566 Decision(0,2,1,3)
567 }
568 else
569 {
570 if (seq1 <= seq2)
571 {
572 if (seq0 <= seq2)
573 Decision(1,0,2,3)
574 else
575 Decision(1,2,0,3)
576 }
577 else
578 Decision(2,1,0,3)
579 }
580
581 #define Merge4Case(a,b,c,d,c0,c1,c2) \
582 s ## a ## b ## c ## d: \
583 if (length == 0) goto finish; \
584 *target = *seq ## a; \
585 ++target; \
586 length--; \
587 ++seq ## a; \
588 if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \
589 if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \
590 if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \
591 goto s ## b ## c ## d ## a;
592
593 Merge4Case(0, 1, 2, 3, <=, <=, <=);
594 Merge4Case(0, 1, 3, 2, <=, <=, <=);
595 Merge4Case(0, 2, 1, 3, <=, <=, <=);
596 Merge4Case(0, 2, 3, 1, <=, <=, <=);
597 Merge4Case(0, 3, 1, 2, <=, <=, <=);
598 Merge4Case(0, 3, 2, 1, <=, <=, <=);
599 Merge4Case(1, 0, 2, 3, < , <=, <=);
600 Merge4Case(1, 0, 3, 2, < , <=, <=);
601 Merge4Case(1, 2, 0, 3, <=, < , <=);
602 Merge4Case(1, 2, 3, 0, <=, <=, < );
603 Merge4Case(1, 3, 0, 2, <=, < , <=);
604 Merge4Case(1, 3, 2, 0, <=, <=, < );
605 Merge4Case(2, 0, 1, 3, < , < , <=);
606 Merge4Case(2, 0, 3, 1, < , <=, < );
607 Merge4Case(2, 1, 0, 3, < , < , <=);
608 Merge4Case(2, 1, 3, 0, < , <=, < );
609 Merge4Case(2, 3, 0, 1, <=, < , < );
610 Merge4Case(2, 3, 1, 0, <=, < , < );
611 Merge4Case(3, 0, 1, 2, < , < , < );
612 Merge4Case(3, 0, 2, 1, < , < , < );
613 Merge4Case(3, 1, 0, 2, < , < , < );
614 Merge4Case(3, 1, 2, 0, < , < , < );
615 Merge4Case(3, 2, 0, 1, < , < , < );
616 Merge4Case(3, 2, 1, 0, < , < , < );
617
618 #undef Merge4Case
619 #undef Decision
620
621 finish:
622 ;
623
624 seqs_begin[0].first = seq0;
625 seqs_begin[1].first = seq1;
626 seqs_begin[2].first = seq2;
627 seqs_begin[3].first = seq3;
628
629 return target;
630 }
631
632 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
633 RandomAccessIterator3
634 multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
635 {
636 _GLIBCXX_CALL(length);
637 typedef _DifferenceTp difference_type;
638
639 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
640 RandomAccessIterator1;
641 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
642 value_type;
643
644 int min_seq;
645 RandomAccessIterator3 target_end;
646
647 // Stable anyway.
648 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
649
650 difference_type total_length = 0;
651 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
652 total_length += LENGTH(*s);
653
654 if (overhang != -1)
655 {
656 difference_type unguarded_length = std::min(length, total_length - overhang);
657 target_end = multiway_merge_4_variant<unguarded_iterator>
658 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
659 overhang = length - unguarded_length;
660 }
661 else
662 {
663 // Empty sequence found.
664 overhang = length;
665 target_end = target;
666 }
667
668 #if _GLIBCXX_ASSERTIONS
669 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
670 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
671 #endif
672
673 std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > one_missing(seqs_begin, seqs_end);
674 one_missing.erase(one_missing.begin() + min_seq); //remove
675
676 target_end = multiway_merge_3_variant<guarded_iterator>(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable);
677
678 // Insert back again.
679 one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
680 // Write back modified iterators.
681 copy(one_missing.begin(), one_missing.end(), seqs_begin);
682
683 #if _GLIBCXX_ASSERTIONS
684 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
685 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
686 #endif
687
688 return target_end;
689 }
690
691 /** @brief Basic multi-way merging procedure.
692 *
693 * The head elements are kept in a sorted array, new heads are
694 * inserted linearly.
695 * @param seqs_begin Begin iterator of iterator pair input sequence.
696 * @param seqs_end End iterator of iterator pair input sequence.
697 * @param target Begin iterator out output sequence.
698 * @param comp Comparator.
699 * @param length Maximum length to merge.
700 * @param stable Stable merging incurs a performance penalty.
701 * @return End iterator of output sequence.
702 */
703 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
704 RandomAccessIterator3
705 multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
706 {
707 _GLIBCXX_CALL(length)
708
709 typedef _DifferenceTp difference_type;
710 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
711 RandomAccessIterator1;
712 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
713 value_type;
714
715 // Num remaining pieces.
716 int k = static_cast<int>(seqs_end - seqs_begin), nrp;
717
718 value_type* pl = new value_type[k];
719 int* source = new int[k];
720 difference_type total_length = 0;
721
722 #define POS(i) seqs_begin[(i)].first
723 #define STOPS(i) seqs_begin[(i)].second
724
725 // Write entries into queue.
726 nrp = 0;
727 for (int pi = 0; pi < k; pi++)
728 {
729 if (STOPS(pi) != POS(pi))
730 {
731 pl[nrp] = *(POS(pi));
732 source[nrp] = pi;
733 nrp++;
734 total_length += LENGTH(seqs_begin[pi]);
735 }
736 }
737
738 if (stable)
739 {
740 for (int k = 0; k < nrp - 1; k++)
741 for (int pi = nrp - 1; pi > k; pi--)
742 if (comp(pl[pi], pl[pi - 1]) ||
743 (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
744 {
745 std::swap(pl[pi - 1], pl[pi]);
746 std::swap(source[pi - 1], source[pi]);
747 }
748 }
749 else
750 {
751 for (int k = 0; k < nrp - 1; k++)
752 for (int pi = nrp - 1; pi > k; pi--)
753 if (comp(pl[pi], pl[pi-1]))
754 {
755 std::swap(pl[pi-1], pl[pi]);
756 std::swap(source[pi-1], source[pi]);
757 }
758 }
759
760 // Iterate.
761 if (stable)
762 {
763 int j;
764 while (nrp > 0 && length > 0)
765 {
766 if (source[0] < source[1])
767 {
768 // pl[0] <= pl[1]
769 while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
770 {
771 *target = pl[0];
772 ++target;
773 ++POS(source[0]);
774 length--;
775 if (POS(source[0]) == STOPS(source[0]))
776 {
777 // Move everything to the left.
778 for (int s = 0; s < nrp - 1; s++)
779 {
780 pl[s] = pl[s + 1];
781 source[s] = source[s + 1];
782 }
783 nrp--;
784 break;
785 }
786 else
787 pl[0] = *(POS(source[0]));
788 }
789 }
790 else
791 {
792 // pl[0] < pl[1]
793 while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
794 {
795 *target = pl[0];
796 ++target;
797 ++POS(source[0]);
798 length--;
799 if (POS(source[0]) == STOPS(source[0]))
800 {
801 for (int s = 0; s < nrp - 1; s++)
802 {
803 pl[s] = pl[s + 1];
804 source[s] = source[s + 1];
805 }
806 nrp--;
807 break;
808 }
809 else
810 pl[0] = *(POS(source[0]));
811 }
812 }
813
814 // Sink down.
815 j = 1;
816 while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
817 (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1]))))
818 {
819 std::swap(pl[j - 1], pl[j]);
820 std::swap(source[j - 1], source[j]);
821 j++;
822 }
823 }
824 }
825 else
826 {
827 int j;
828 while (nrp > 0 && length > 0)
829 {
830 // pl[0] <= pl[1]
831 while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
832 {
833 *target = pl[0];
834 ++target;
835 ++POS(source[0]);
836 length--;
837 if (POS(source[0]) == STOPS(source[0]))
838 {
839 for (int s = 0; s < (nrp - 1); s++)
840 {
841 pl[s] = pl[s + 1];
842 source[s] = source[s + 1];
843 }
844 nrp--;
845 break;
846 }
847 else
848 pl[0] = *(POS(source[0]));
849 }
850
851 // Sink down.
852 j = 1;
853 while ((j < nrp) && comp(pl[j], pl[j - 1]))
854 {
855 std::swap(pl[j - 1], pl[j]);
856 std::swap(source[j - 1], source[j]);
857 j++;
858 }
859 }
860 }
861
862 delete[] pl;
863 delete[] source;
864
865 return target;
866 }
867
868 /** @brief Multi-way merging procedure for a high branching factor,
869 * guarded case.
870 *
871 * The head elements are kept in a loser tree.
872 * @param seqs_begin Begin iterator of iterator pair input sequence.
873 * @param seqs_end End iterator of iterator pair input sequence.
874 * @param target Begin iterator out output sequence.
875 * @param comp Comparator.
876 * @param length Maximum length to merge.
877 * @param stable Stable merging incurs a performance penalty.
878 * @return End iterator of output sequence.
879 */
880 template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
881 RandomAccessIterator3
882 multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
883 {
884 _GLIBCXX_CALL(length)
885
886 typedef _DifferenceTp difference_type;
887 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
888 RandomAccessIterator1;
889 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
890 value_type;
891
892 int k = static_cast<int>(seqs_end - seqs_begin);
893
894 LT lt(k, comp);
895
896 difference_type total_length = 0;
897
898 for (int t = 0; t < k; t++)
899 {
900 if (stable)
901 {
902 if (seqs_begin[t].first == seqs_begin[t].second)
903 lt.insert_start_stable(value_type(), t, true);
904 else
905 lt.insert_start_stable(*seqs_begin[t].first, t, false);
906 }
907 else
908 {
909 if (seqs_begin[t].first == seqs_begin[t].second)
910 lt.insert_start(value_type(), t, true);
911 else
912 lt.insert_start(*seqs_begin[t].first, t, false);
913 }
914
915 total_length += LENGTH(seqs_begin[t]);
916 }
917
918 if (stable)
919 lt.init_stable();
920 else
921 lt.init();
922
923 total_length = std::min(total_length, length);
924
925 int source;
926
927 if (stable)
928 {
929 for (difference_type i = 0; i < total_length; i++)
930 {
931 // Take out.
932 source = lt.get_min_source();
933
934 *(target++) = *(seqs_begin[source].first++);
935
936 // Feed.
937 if (seqs_begin[source].first == seqs_begin[source].second)
938 lt.delete_min_insert_stable(value_type(), true);
939 else
940 // Replace from same source.
941 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
942
943 }
944 }
945 else
946 {
947 for (difference_type i = 0; i < total_length; i++)
948 {
949 //take out
950 source = lt.get_min_source();
951
952 *(target++) = *(seqs_begin[source].first++);
953
954 // Feed.
955 if (seqs_begin[source].first == seqs_begin[source].second)
956 lt.delete_min_insert(value_type(), true);
957 else
958 // Replace from same source.
959 lt.delete_min_insert(*seqs_begin[source].first, false);
960 }
961 }
962
963 return target;
964 }
965
966 /** @brief Multi-way merging procedure for a high branching factor,
967 * unguarded case.
968 *
969 * The head elements are kept in a loser tree.
970 * @param seqs_begin Begin iterator of iterator pair input sequence.
971 * @param seqs_end End iterator of iterator pair input sequence.
972 * @param target Begin iterator out output sequence.
973 * @param comp Comparator.
974 * @param length Maximum length to merge.
975 * @param stable Stable merging incurs a performance penalty.
976 * @return End iterator of output sequence.
977 * @pre No input will run out of elements during the merge.
978 */
979 template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
980 RandomAccessIterator3
981 multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
982 {
983 _GLIBCXX_CALL(length)
984 typedef _DifferenceTp difference_type;
985
986 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
987 RandomAccessIterator1;
988 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
989 value_type;
990
991 int k = seqs_end - seqs_begin;
992
993 LT lt(k, comp);
994
995 difference_type total_length = 0;
996
997 for (int t = 0; t < k; t++)
998 {
999 #if _GLIBCXX_ASSERTIONS
1000 _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
1001 #endif
1002 if (stable)
1003 lt.insert_start_stable(*seqs_begin[t].first, t, false);
1004 else
1005 lt.insert_start(*seqs_begin[t].first, t, false);
1006
1007 total_length += LENGTH(seqs_begin[t]);
1008 }
1009
1010 if (stable)
1011 lt.init_stable();
1012 else
1013 lt.init();
1014
1015 // Do not go past end.
1016 length = std::min(total_length, length);
1017
1018 int source;
1019
1020 #if _GLIBCXX_ASSERTIONS
1021 difference_type i = 0;
1022 #endif
1023
1024 if (stable)
1025 {
1026 RandomAccessIterator3 target_end = target + length;
1027 while (target < target_end)
1028 {
1029 // Take out.
1030 source = lt.get_min_source();
1031
1032 #if _GLIBCXX_ASSERTIONS
1033 _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
1034 #endif
1035
1036 *(target++) = *(seqs_begin[source].first++);
1037
1038 #if _GLIBCXX_ASSERTIONS
1039 _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1));
1040 i++;
1041 #endif
1042 // Feed.
1043 // Replace from same source.
1044 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
1045
1046 }
1047 }
1048 else
1049 {
1050 RandomAccessIterator3 target_end = target + length;
1051 while (target < target_end)
1052 {
1053 // Take out.
1054 source = lt.get_min_source();
1055
1056 #if _GLIBCXX_ASSERTIONS
1057 if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
1058 printf(" %i %i %i\n", length, i, source);
1059 _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
1060 #endif
1061
1062 *(target++) = *(seqs_begin[source].first++);
1063
1064 #if _GLIBCXX_ASSERTIONS
1065 if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)))
1066 printf(" %i %i %i\n", length, i, source);
1067 _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1));
1068 i++;
1069 #endif
1070 // Feed.
1071 // Replace from same source.
1072 lt.delete_min_insert(*seqs_begin[source].first, false);
1073 }
1074 }
1075
1076 return target;
1077 }
1078
1079 template<typename _ValueTp, class Comparator>
1080 struct loser_tree_traits
1081 {
1082 typedef LoserTree/*Pointer*/<_ValueTp, Comparator> LT;
1083 };
1084
1085
1086 /*#define NO_POINTER(T) \
1087 template<typename Comparator> \
1088 struct loser_tree_traits<T, Comparator> \
1089 { \
1090 typedef LoserTreePointer<T, Comparator> LT; \
1091 };*/
1092 //
1093 // NO_POINTER(unsigned char)
1094 // NO_POINTER(char)
1095 // NO_POINTER(unsigned short)
1096 // NO_POINTER(short)
1097 // NO_POINTER(unsigned int)
1098 // NO_POINTER(int)
1099 // NO_POINTER(unsigned long)
1100 // NO_POINTER(long)
1101 // NO_POINTER(unsigned long long)
1102 // NO_POINTER(long long)
1103 //
1104 // #undef NO_POINTER
1105
1106 template<typename _ValueTp, class Comparator>
1107 struct loser_tree_traits_unguarded
1108 {
1109 typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
1110 };
1111
1112 /*#define NO_POINTER_UNGUARDED(T) \
1113 template<typename Comparator> \
1114 struct loser_tree_traits_unguarded<T, Comparator> \
1115 { \
1116 typedef LoserTreePointerUnguarded<T, Comparator> LT; \
1117 };*/
1118 //
1119 // NO_POINTER_UNGUARDED(unsigned char)
1120 // NO_POINTER_UNGUARDED(char)
1121 // NO_POINTER_UNGUARDED(unsigned short)
1122 // NO_POINTER_UNGUARDED(short)
1123 // NO_POINTER_UNGUARDED(unsigned int)
1124 // NO_POINTER_UNGUARDED(int)
1125 // NO_POINTER_UNGUARDED(unsigned long)
1126 // NO_POINTER_UNGUARDED(long)
1127 // NO_POINTER_UNGUARDED(unsigned long long)
1128 // NO_POINTER_UNGUARDED(long long)
1129 //
1130 // #undef NO_POINTER_UNGUARDED
1131
1132 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1133 RandomAccessIterator3
1134 multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
1135 {
1136 _GLIBCXX_CALL(length)
1137
1138 typedef _DifferenceTp difference_type;
1139
1140 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1141 RandomAccessIterator1;
1142 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1143 value_type;
1144
1145 int min_seq;
1146 RandomAccessIterator3 target_end;
1147 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
1148 comp, min_seq, stable);
1149
1150 difference_type total_length = 0;
1151 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1152 total_length += LENGTH(*s);
1153
1154 if (overhang != -1)
1155 {
1156 difference_type unguarded_length = std::min(length, total_length - overhang);
1157 target_end = multiway_merge_loser_tree_unguarded
1158 <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
1159 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1160 overhang = length - unguarded_length;
1161 }
1162 else
1163 {
1164 // Empty sequence found.
1165 overhang = length;
1166 target_end = target;
1167 }
1168
1169 #if _GLIBCXX_ASSERTIONS
1170 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1171 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1172 #endif
1173
1174 target_end = multiway_merge_loser_tree
1175 <typename loser_tree_traits<value_type, Comparator>::LT>
1176 (seqs_begin, seqs_end, target_end, comp, overhang, stable);
1177
1178 #if _GLIBCXX_ASSERTIONS
1179 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1180 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1181 #endif
1182
1183 return target_end;
1184 }
1185
1186 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1187 RandomAccessIterator3
1188 multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
1189 {
1190 _GLIBCXX_CALL(length)
1191
1192 typedef _DifferenceTp difference_type;
1193 typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
1194 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1195 RandomAccessIterator1;
1196 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1197 value_type;
1198 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1199 RandomAccessIterator1;
1200
1201 RandomAccessIterator3 target_end;
1202 difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
1203
1204 difference_type total_length = 0;
1205 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1206 {
1207 total_length += LENGTH(*s);
1208
1209 // Sentinel spot.
1210 (*s).second++;
1211 }
1212
1213 difference_type unguarded_length = std::min(length, total_length - overhang);
1214 target_end = multiway_merge_loser_tree_unguarded
1215 <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
1216 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1217 overhang = length - unguarded_length;
1218
1219 #if _GLIBCXX_ASSERTIONS
1220 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1221 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1222 #endif
1223
1224 // Copy rest stable.
1225 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++)
1226 {
1227 // Restore.
1228 (*s).second--;
1229 difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s));
1230 target_end = std::copy((*s).first, (*s).first + local_length, target_end);
1231 (*s).first += local_length;
1232 overhang -= local_length;
1233 }
1234
1235 #if _GLIBCXX_ASSERTIONS
1236 _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
1237 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1238 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1239 #endif
1240
1241 return target_end;
1242 }
1243
1244 /** @brief Sequential multi-way merging switch.
1245 *
1246 * The decision if based on the branching factor and runtime settings.
1247 * @param seqs_begin Begin iterator of iterator pair input sequence.
1248 * @param seqs_end End iterator of iterator pair input sequence.
1249 * @param target Begin iterator out output sequence.
1250 * @param comp Comparator.
1251 * @param length Maximum length to merge.
1252 * @param stable Stable merging incurs a performance penalty.
1253 * @param sentinel The sequences have a sentinel element.
1254 * @return End iterator of output sequence. */
1255 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1256 RandomAccessIterator3
1257 multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag)
1258 {
1259 _GLIBCXX_CALL(length)
1260
1261 typedef _DifferenceTp difference_type;
1262 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1263 RandomAccessIterator1;
1264 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1265 value_type;
1266
1267 #if _GLIBCXX_ASSERTIONS
1268 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1269 _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
1270 #endif
1271
1272 RandomAccessIterator3 return_target = target;
1273 int k = static_cast<int>(seqs_end - seqs_begin);
1274
1275 Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm;
1276
1277 if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
1278 mwma = Settings::LOSER_TREE_COMBINED;
1279
1280 switch (k)
1281 {
1282 case 0:
1283 break;
1284 case 1:
1285 return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target);
1286 seqs_begin[0].first += length;
1287 break;
1288 case 2:
1289 return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp);
1290 break;
1291 case 3:
1292 switch (mwma)
1293 {
1294 case Settings::LOSER_TREE_COMBINED:
1295 return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable);
1296 break;
1297 case Settings::LOSER_TREE_SENTINEL:
1298 return_target = multiway_merge_3_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1299 break;
1300 default:
1301 return_target = multiway_merge_3_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1302 break;
1303 }
1304 break;
1305 case 4:
1306 switch (mwma)
1307 {
1308 case Settings::LOSER_TREE_COMBINED:
1309 return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable);
1310 break;
1311 case Settings::LOSER_TREE_SENTINEL:
1312 return_target = multiway_merge_4_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1313 break;
1314 default:
1315 return_target = multiway_merge_4_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1316 break;
1317 }
1318 break;
1319 default:
1320 {
1321 switch (mwma)
1322 {
1323 case Settings::BUBBLE:
1324 return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable);
1325 break;
1326 #if _GLIBCXX_LOSER_TREE_EXPLICIT
1327 case Settings::LOSER_TREE_EXPLICIT:
1328 return_target = multiway_merge_loser_tree<LoserTreeExplicit<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
1329 break;
1330 #endif
1331 #if _GLIBCXX_LOSER_TREE
1332 case Settings::LOSER_TREE:
1333 return_target = multiway_merge_loser_tree<LoserTree<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
1334 break;
1335 #endif
1336 #if _GLIBCXX_LOSER_TREE_COMBINED
1337 case Settings::LOSER_TREE_COMBINED:
1338 return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable);
1339 break;
1340 #endif
1341 #if _GLIBCXX_LOSER_TREE_SENTINEL
1342 case Settings::LOSER_TREE_SENTINEL:
1343 return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable);
1344 break;
1345 #endif
1346 default:
1347 // multiway_merge algorithm not implemented.
1348 _GLIBCXX_PARALLEL_ASSERT(0);
1349 break;
1350 }
1351 }
1352 }
1353 #if _GLIBCXX_ASSERTIONS
1354 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1355 #endif
1356
1357 return return_target;
1358 }
1359
1360 /** @brief Parallel multi-way merge routine.
1361 *
1362 * The decision if based on the branching factor and runtime settings.
1363 * @param seqs_begin Begin iterator of iterator pair input sequence.
1364 * @param seqs_end End iterator of iterator pair input sequence.
1365 * @param target Begin iterator out output sequence.
1366 * @param comp Comparator.
1367 * @param length Maximum length to merge.
1368 * @param stable Stable merging incurs a performance penalty.
1369 * @param sentinel Ignored.
1370 * @return End iterator of output sequence.
1371 */
1372 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1373 RandomAccessIterator3
1374 parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel)
1375 {
1376 _GLIBCXX_CALL(length)
1377
1378 typedef _DifferenceTp difference_type;
1379 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1380 RandomAccessIterator1;
1381 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1382 value_type;
1383
1384 #if _GLIBCXX_ASSERTIONS
1385 for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++)
1386 _GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp));
1387 #endif
1388
1389 // k sequences.
1390 int k = static_cast<int>(seqs_end - seqs_begin);
1391
1392 difference_type total_length = 0;
1393 for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++)
1394 total_length += LENGTH(*raii);
1395
1396 _GLIBCXX_CALL(total_length)
1397
1398 if (total_length == 0 || k == 0)
1399 return target;
1400
1401 thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
1402
1403 Timing<sequential_tag>* t = new Timing<sequential_tag>[num_threads];
1404
1405 for (int pr = 0; pr < num_threads; pr++)
1406 t[pr].tic();
1407
1408 bool tight = (total_length == length);
1409
1410 // Thread t will have to merge pieces[iam][0..k - 1]
1411 std::vector<std::pair<difference_type, difference_type> >* pieces = new std::vector<std::pair<difference_type, difference_type> >[num_threads];
1412 for (int s = 0; s < num_threads; s++)
1413 pieces[s].resize(k);
1414
1415 difference_type num_samples = Settings::merge_oversampling * num_threads;
1416
1417 if (Settings::multiway_merge_splitting == Settings::SAMPLING)
1418 {
1419 value_type* samples = new value_type[k * num_samples];
1420 // Sample.
1421 for (int s = 0; s < k; s++)
1422 for (int i = 0; (difference_type)i < num_samples; i++)
1423 {
1424 difference_type sample_index = static_cast<difference_type>(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length));
1425 samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
1426 }
1427
1428 if (stable)
1429 __gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp);
1430 else
1431 __gnu_sequential::sort(samples, samples + (num_samples * k), comp);
1432
1433 for (int slab = 0; slab < num_threads; slab++)
1434 // For each slab / processor.
1435 for (int seq = 0; seq < k; seq++)
1436 {
1437 // For each sequence.
1438 if (slab > 0)
1439 pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first;
1440 else
1441 {
1442 // Absolute beginning.
1443 pieces[slab][seq].first = 0;
1444 }
1445 if ((slab + 1) < num_threads)
1446 pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first;
1447 else
1448 pieces[slab][seq].second = LENGTH(seqs_begin[seq]); //absolute ending
1449 }
1450 delete[] samples;
1451 }
1452 else
1453 {
1454 // (Settings::multiway_merge_splitting == Settings::EXACT).
1455 std::vector<RandomAccessIterator1>* offsets = new std::vector<RandomAccessIterator1>[num_threads];
1456 std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > se(k);
1457
1458 copy(seqs_begin, seqs_end, se.begin());
1459
1460 difference_type* borders = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_threads + 1)));
1461 equally_split(length, num_threads, borders);
1462
1463 for (int s = 0; s < (num_threads - 1); s++)
1464 {
1465 offsets[s].resize(k);
1466 multiseq_partition(se.begin(), se.end(), borders[s + 1],
1467 offsets[s].begin(), comp);
1468
1469 // Last one also needed and available.
1470 if (!tight)
1471 {
1472 offsets[num_threads - 1].resize(k);
1473 multiseq_partition(se.begin(), se.end(),
1474 difference_type(length),
1475 offsets[num_threads - 1].begin(), comp);
1476 }
1477 }
1478
1479
1480 for (int slab = 0; slab < num_threads; slab++)
1481 {
1482 // For each slab / processor.
1483 for (int seq = 0; seq < k; seq++)
1484 {
1485 // For each sequence.
1486 if (slab == 0)
1487 {
1488 // Absolute beginning.
1489 pieces[slab][seq].first = 0;
1490 }
1491 else
1492 pieces[slab][seq].first = pieces[slab - 1][seq].second;
1493 if (!tight || slab < (num_threads - 1))
1494 pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first;
1495 else
1496 {
1497 // slab == num_threads - 1
1498 pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
1499 }
1500 }
1501 }
1502 delete[] offsets;
1503 }
1504
1505 for (int pr = 0; pr < num_threads; pr++)
1506 t[pr].tic();
1507
1508 # pragma omp parallel num_threads(num_threads)
1509 {
1510 thread_index_t iam = omp_get_thread_num();
1511
1512 t[iam].tic();
1513
1514 difference_type target_position = 0;
1515
1516 for (int c = 0; c < k; c++)
1517 target_position += pieces[iam][c].first;
1518
1519 if (k > 2)
1520 {
1521 std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks = new std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
1522
1523 difference_type local_length = 0;
1524 for (int s = 0; s < k; s++)
1525 {
1526 chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second);
1527 local_length += LENGTH(chunks[s]);
1528 }
1529
1530 multiway_merge(chunks, chunks + k, target + target_position, comp,
1531 std::min(local_length, length - target_position),
1532 stable, false, sequential_tag());
1533
1534 delete[] chunks;
1535 }
1536 else if (k == 2)
1537 {
1538 RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first;
1539 merge_advance(begin0,
1540 seqs_begin[0].first + pieces[iam][0].second,
1541 begin1,
1542 seqs_begin[1].first + pieces[iam][1].second,
1543 target + target_position,
1544 (pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
1545 comp);
1546 }
1547
1548 t[iam].tic();
1549
1550 }
1551
1552 for (int pr = 0; pr < num_threads; pr++)
1553 t[pr].tic();
1554
1555 #if _GLIBCXX_ASSERTIONS
1556 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1557 #endif
1558
1559 // Update ends of sequences.
1560 for (int s = 0; s < k; s++)
1561 seqs_begin[s].first += pieces[num_threads - 1][s].second;
1562
1563 delete[] pieces;
1564
1565 for (int pr = 0; pr < num_threads; pr++)
1566 t[pr].tic();
1567 for (int pr = 0; pr < num_threads; pr++)
1568 t[pr].print();
1569 delete[] t;
1570
1571 return target + length;
1572 }
1573
1574 /**
1575 * @brief Multi-way merging front-end.
1576 * @param seqs_begin Begin iterator of iterator pair input sequence.
1577 * @param seqs_end End iterator of iterator pair input sequence.
1578 * @param target Begin iterator out output sequence.
1579 * @param comp Comparator.
1580 * @param length Maximum length to merge.
1581 * @param stable Stable merging incurs a performance penalty.
1582 * @return End iterator of output sequence.
1583 */
1584 template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1585 RandomAccessIterator3
1586 multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
1587 RandomAccessIteratorPairIterator seqs_end,
1588 RandomAccessIterator3 target, Comparator comp,
1589 _DifferenceTp length, bool stable)
1590 {
1591 typedef _DifferenceTp difference_type;
1592 _GLIBCXX_CALL(seqs_end - seqs_begin)
1593
1594 if (seqs_begin == seqs_end)
1595 return target;
1596
1597 RandomAccessIterator3 target_end;
1598 if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1599 target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false);
1600 else
1601 target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag());
1602
1603 return target_end;
1604 }
1605
1606 /** @brief Multi-way merging front-end.
1607 * @param seqs_begin Begin iterator of iterator pair input sequence.
1608 * @param seqs_end End iterator of iterator pair input sequence.
1609 * @param target Begin iterator out output sequence.
1610 * @param comp Comparator.
1611 * @param length Maximum length to merge.
1612 * @param stable Stable merging incurs a performance penalty.
1613 * @return End iterator of output sequence.
1614 * @pre For each @c i, @c seqs_begin[i].second must be the end
1615 * marker of the sequence, but also reference the one more sentinel
1616 * element. */
1617 template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1618 RandomAccessIterator3
1619 multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
1620 RandomAccessIteratorPairIterator seqs_end,
1621 RandomAccessIterator3 target,
1622 Comparator comp,
1623 _DifferenceTp length,
1624 bool stable)
1625 {
1626 typedef _DifferenceTp difference_type;
1627
1628 if (seqs_begin == seqs_end)
1629 return target;
1630
1631 _GLIBCXX_CALL(seqs_end - seqs_begin)
1632
1633 if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1634 return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits<RandomAccessIterator3>::difference_type)length, stable, true);
1635 else
1636 return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag());
1637 }
1638 }
1639
1640 #endif