libstdc++-v3/include/parallel/multiway_mergesort.h

   1 // -*- C++ -*-
   2
   3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
   4 //
   5 // This file is part of the GNU ISO C++ Library.  This library is free
   6 // software; you can redistribute it and/or modify it under the terms
   7 // of the GNU General Public License as published by the Free Software
   8 // Foundation; either version 3, or (at your option) any later
   9 // version.
  10
  11 // This library is distributed in the hope that it will be useful, but
  12 // WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 // General Public License for more details.
  15
  16 // Under Section 7 of GPL version 3, you are granted additional
  17 // permissions described in the GCC Runtime Library Exception, version
  18 // 3.1, as published by the Free Software Foundation.
  19
  20 // You should have received a copy of the GNU General Public License and
  21 // a copy of the GCC Runtime Library Exception along with this program;
  22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 // <http://www.gnu.org/licenses/>.
  24
  25 /** @file parallel/multiway_mergesort.h
  26  *  @brief Parallel multiway merge sort.
  27  *  This file is a GNU parallel extension to the Standard C++ Library.
  28  */
  29
  30 // Written by Johannes Singler.
  31
  32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
  33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
  34
  35 #include <vector>
  36
  37 #include <parallel/basic_iterator.h>
  38 #include <bits/stl_algo.h>
  39 #include <parallel/parallel.h>
  40 #include <parallel/multiway_merge.h>
  41
  42 namespace __gnu_parallel
  43 {
  44   /** @brief Subsequence description. */
  45   template<typename _DifferenceTp>
  46     struct _Piece
  47     {
  48       typedef _DifferenceTp _DifferenceType;
  49
  50       /** @brief Begin of subsequence. */
  51       _DifferenceType _M_begin;
  52
  53       /** @brief End of subsequence. */
  54       _DifferenceType _M_end;
  55     };
  56
  57   /** @brief Data accessed by all threads.
  58    *
  59    *  PMWMS = parallel multiway mergesort */
  60   template<typename _RAIter>
  61     struct _PMWMSSortingData
  62     {
  63       typedef std::iterator_traits<_RAIter> _TraitsType;
  64       typedef typename _TraitsType::value_type _ValueType;
  65       typedef typename _TraitsType::difference_type _DifferenceType;
  66
  67       /** @brief Number of threads involved. */
  68       _ThreadIndex _M_num_threads;
  69
  70       /** @brief Input __begin. */
  71       _RAIter _M_source;
  72
  73       /** @brief Start indices, per thread. */
  74       _DifferenceType* _M_starts;
  75
  76       /** @brief Storage in which to sort. */
  77       _ValueType** _M_temporary;
  78
  79       /** @brief Samples. */
  80       _ValueType* _M_samples;
  81
  82       /** @brief Offsets to add to the found positions. */
  83       _DifferenceType* _M_offsets;
  84
  85       /** @brief Pieces of data to merge @__c [thread][__sequence] */
  86       std::vector<_Piece<_DifferenceType> >* _M_pieces;
  87   };
  88
  89   /**
  90    *  @brief Select _M_samples from a sequence.
  91    *  @param __sd Pointer to algorithm data. _Result will be placed in
  92    *  @__c __sd->_M_samples.
  93    *  @param __num_samples Number of _M_samples to select.
  94    */
  95   template<typename _RAIter, typename _DifferenceTp>
  96     void
  97     __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
  98                         _DifferenceTp __num_samples)
  99     {
 100       typedef std::iterator_traits<_RAIter> _TraitsType;
 101       typedef typename _TraitsType::value_type _ValueType;
 102       typedef _DifferenceTp _DifferenceType;
 103
 104       _ThreadIndex __iam = omp_get_thread_num();
 105
 106       _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
 107
 108       equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
 109                     __num_samples + 1, __es);
 110
 111       for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
 112         ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
 113             _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
 114                                        + __es[__i + 1]]);
 115
 116       delete[] __es;
 117     }
 118
 119   /** @brief Split consistently. */
 120   template<bool __exact, typename _RAIter,
 121            typename _Compare, typename _SortingPlacesIterator>
 122     struct _SplitConsistently
 123     { };
 124
 125   /** @brief Split by exact splitting. */
 126   template<typename _RAIter, typename _Compare,
 127            typename _SortingPlacesIterator>
 128     struct _SplitConsistently<true, _RAIter,
 129                               _Compare, _SortingPlacesIterator>
 130     {
 131       void
 132       operator()(const _ThreadIndex __iam,
 133                  _PMWMSSortingData<_RAIter>* __sd,
 134                  _Compare& __comp,
 135                  const typename
 136                  std::iterator_traits<_RAIter>::difference_type
 137                  __num_samples) const
 138       {
 139 #       pragma omp barrier
 140
 141         std::vector<std::pair<_SortingPlacesIterator,
 142                               _SortingPlacesIterator> >
 143           seqs(__sd->_M_num_threads);
 144         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
 145           seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
 146                                      __sd->_M_temporary[__s]
 147                                      + (__sd->_M_starts[__s + 1]
 148                                         - __sd->_M_starts[__s]));
 149
 150         std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
 151
 152         // if not last thread
 153         if (__iam < __sd->_M_num_threads - 1)
 154           multiseq_partition(seqs.begin(), seqs.end(),
 155                              __sd->_M_starts[__iam + 1], _M_offsets.begin(),
 156                              __comp);
 157
 158         for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
 159           {
 160             // for each sequence
 161             if (__iam < (__sd->_M_num_threads - 1))
 162               __sd->_M_pieces[__iam][__seq]._M_end
 163                 = _M_offsets[__seq] - seqs[__seq].first;
 164             else
 165               // very end of this sequence
 166               __sd->_M_pieces[__iam][__seq]._M_end =
 167                 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
 168           }
 169
 170 #       pragma omp barrier
 171
 172         for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
 173           {
 174             // For each sequence.
 175             if (__iam > 0)
 176               __sd->_M_pieces[__iam][__seq]._M_begin =
 177                 __sd->_M_pieces[__iam - 1][__seq]._M_end;
 178             else
 179               // Absolute beginning.
 180               __sd->_M_pieces[__iam][__seq]._M_begin = 0;
 181           }
 182       }
 183   };
 184
 185   /** @brief Split by sampling. */
 186   template<typename _RAIter, typename _Compare,
 187            typename _SortingPlacesIterator>
 188     struct _SplitConsistently<false, _RAIter, _Compare,
 189                               _SortingPlacesIterator>
 190     {
 191       void
 192       operator()(const _ThreadIndex __iam,
 193                  _PMWMSSortingData<_RAIter>* __sd,
 194                  _Compare& __comp,
 195                  const typename
 196                  std::iterator_traits<_RAIter>::difference_type
 197                  __num_samples) const
 198       {
 199         typedef std::iterator_traits<_RAIter> _TraitsType;
 200         typedef typename _TraitsType::value_type _ValueType;
 201         typedef typename _TraitsType::difference_type _DifferenceType;
 202
 203         __determine_samples(__sd, __num_samples);
 204
 205 #       pragma omp barrier
 206
 207 #       pragma omp single
 208         __gnu_sequential::sort(__sd->_M_samples,
 209                                __sd->_M_samples
 210                                + (__num_samples * __sd->_M_num_threads),
 211                                __comp);
 212
 213 #       pragma omp barrier
 214
 215         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
 216           {
 217             // For each sequence.
 218             if (__num_samples * __iam > 0)
 219               __sd->_M_pieces[__iam][__s]._M_begin =
 220                 std::lower_bound(__sd->_M_temporary[__s],
 221                                  __sd->_M_temporary[__s]
 222                                  + (__sd->_M_starts[__s + 1]
 223                                     - __sd->_M_starts[__s]),
 224                                  __sd->_M_samples[__num_samples * __iam],
 225                                  __comp)
 226                 - __sd->_M_temporary[__s];
 227             else
 228               // Absolute beginning.
 229               __sd->_M_pieces[__iam][__s]._M_begin = 0;
 230
 231             if ((__num_samples * (__iam + 1)) <
 232                 (__num_samples * __sd->_M_num_threads))
 233               __sd->_M_pieces[__iam][__s]._M_end =
 234                 std::lower_bound(__sd->_M_temporary[__s],
 235                                  __sd->_M_temporary[__s]
 236                                  + (__sd->_M_starts[__s + 1]
 237                                     - __sd->_M_starts[__s]),
 238                                  __sd->_M_samples[__num_samples * (__iam + 1)],
 239                                  __comp)
 240                 - __sd->_M_temporary[__s];
 241             else
 242               // Absolute end.
 243               __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
 244                                                     - __sd->_M_starts[__s]);
 245           }
 246       }
 247   };
 248
 249   template<bool __stable, typename _RAIter, typename _Compare>
 250     struct __possibly_stable_sort
 251     { };
 252
 253   template<typename _RAIter, typename _Compare>
 254     struct __possibly_stable_sort<true, _RAIter, _Compare>
 255     {
 256       void operator()(const _RAIter& __begin,
 257                       const _RAIter& __end, _Compare& __comp) const
 258       { __gnu_sequential::stable_sort(__begin, __end, __comp); }
 259     };
 260
 261   template<typename _RAIter, typename _Compare>
 262     struct __possibly_stable_sort<false, _RAIter, _Compare>
 263     {
 264       void operator()(const _RAIter __begin,
 265                       const _RAIter __end, _Compare& __comp) const
 266       { __gnu_sequential::sort(__begin, __end, __comp); }
 267     };
 268
 269   template<bool __stable, typename Seq_RAIter,
 270            typename _RAIter, typename _Compare,
 271            typename DiffType>
 272     struct __possibly_stable_multiway_merge
 273     { };
 274
 275   template<typename Seq_RAIter, typename _RAIter,
 276            typename _Compare, typename _DiffType>
 277     struct __possibly_stable_multiway_merge<true, Seq_RAIter,
 278                                             _RAIter, _Compare, _DiffType>
 279     {
 280       void operator()(const Seq_RAIter& __seqs_begin,
 281                       const Seq_RAIter& __seqs_end,
 282                       const _RAIter& __target,
 283                       _Compare& __comp,
 284                       _DiffType __length_am) const
 285       {
 286         stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
 287                               __comp, sequential_tag());
 288       }
 289     };
 290
 291   template<typename Seq_RAIter, typename _RAIter,
 292            typename _Compare, typename _DiffType>
 293     struct __possibly_stable_multiway_merge<false, Seq_RAIter,
 294                                             _RAIter, _Compare, _DiffType>
 295     {
 296       void operator()(const Seq_RAIter& __seqs_begin,
 297                       const Seq_RAIter& __seqs_end,
 298                       const _RAIter& __target,
 299                       _Compare& __comp,
 300                       _DiffType __length_am) const
 301       {
 302         multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
 303                        sequential_tag());
 304       }
 305     };
 306
 307   /** @brief PMWMS code executed by each thread.
 308    *  @param __sd Pointer to algorithm data.
 309    *  @param __comp Comparator.
 310    */
 311   template<bool __stable, bool __exact, typename _RAIter,
 312            typename _Compare>
 313     void
 314     parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
 315                           _Compare& __comp)
 316     {
 317       typedef std::iterator_traits<_RAIter> _TraitsType;
 318       typedef typename _TraitsType::value_type _ValueType;
 319       typedef typename _TraitsType::difference_type _DifferenceType;
 320
 321       _ThreadIndex __iam = omp_get_thread_num();
 322
 323       // Length of this thread's chunk, before merging.
 324       _DifferenceType __length_local
 325         = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
 326
 327       // Sort in temporary storage, leave space for sentinel.
 328
 329       typedef _ValueType* _SortingPlacesIterator;
 330
 331       __sd->_M_temporary[__iam] =
 332         static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
 333                                                 * (__length_local + 1)));
 334
 335       // Copy there.
 336       std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
 337                               __sd->_M_source + __sd->_M_starts[__iam]
 338                               + __length_local,
 339                               __sd->_M_temporary[__iam]);
 340
 341       __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
 342         (__sd->_M_temporary[__iam],
 343          __sd->_M_temporary[__iam] + __length_local,
 344          __comp);
 345
 346       // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
 347       // __sd->_M_temporary[__iam] + __length_local.
 348
 349       // No barrier here: Synchronization is done by the splitting routine.
 350
 351       _DifferenceType __num_samples =
 352         _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
 353       _SplitConsistently
 354         <__exact, _RAIter, _Compare, _SortingPlacesIterator>()
 355         (__iam, __sd, __comp, __num_samples);
 356
 357       // Offset from __target __begin, __length after merging.
 358       _DifferenceType __offset = 0, __length_am = 0;
 359       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
 360         {
 361           __length_am += (__sd->_M_pieces[__iam][__s]._M_end
 362                           - __sd->_M_pieces[__iam][__s]._M_begin);
 363           __offset += __sd->_M_pieces[__iam][__s]._M_begin;
 364         }
 365
 366       typedef std::vector<
 367       std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
 368         _SeqVector;
 369       _SeqVector seqs(__sd->_M_num_threads);
 370
 371       for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
 372         {
 373           seqs[__s] =
 374             std::make_pair
 375             (__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
 376              __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
 377         }
 378
 379       __possibly_stable_multiway_merge<
 380         __stable,
 381         typename _SeqVector::iterator,
 382         _RAIter,
 383         _Compare, _DifferenceType>()
 384           (seqs.begin(), seqs.end(),
 385            __sd->_M_source + __offset, __comp,
 386            __length_am);
 387
 388 #     pragma omp barrier
 389
 390       ::operator delete(__sd->_M_temporary[__iam]);
 391     }
 392
 393   /** @brief PMWMS main call.
 394    *  @param __begin Begin iterator of sequence.
 395    *  @param __end End iterator of sequence.
 396    *  @param __comp Comparator.
 397    *  @param __n Length of sequence.
 398    *  @param __num_threads Number of threads to use.
 399    */
 400   template<bool __stable, bool __exact, typename _RAIter,
 401            typename _Compare>
 402     void
 403     parallel_sort_mwms(_RAIter __begin, _RAIter __end,
 404                        _Compare __comp,
 405                        _ThreadIndex __num_threads)
 406     {
 407       _GLIBCXX_CALL(__end - __begin)
 408
 409       typedef std::iterator_traits<_RAIter> _TraitsType;
 410       typedef typename _TraitsType::value_type _ValueType;
 411       typedef typename _TraitsType::difference_type _DifferenceType;
 412
 413       _DifferenceType __n = __end - __begin;
 414
 415       if (__n <= 1)
 416         return;
 417
 418       // at least one element per thread
 419       if (__num_threads > __n)
 420         __num_threads = static_cast<_ThreadIndex>(__n);
 421
 422       // shared variables
 423       _PMWMSSortingData<_RAIter> __sd;
 424       _DifferenceType* _M_starts;
 425
 426 #     pragma omp parallel num_threads(__num_threads)
 427       {
 428         __num_threads = omp_get_num_threads(); //no more threads than requested
 429
 430 #       pragma omp single
 431         {
 432           __sd._M_num_threads = __num_threads;
 433           __sd._M_source = __begin;
 434
 435           __sd._M_temporary = new _ValueType*[__num_threads];
 436
 437           if (!__exact)
 438             {
 439               _DifferenceType __size =
 440                 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
 441                 * __num_threads;
 442               __sd._M_samples = static_cast<_ValueType*>
 443                 (::operator new(__size * sizeof(_ValueType)));
 444             }
 445           else
 446             __sd._M_samples = NULL;
 447
 448           __sd._M_offsets = new _DifferenceType[__num_threads - 1];
 449           __sd._M_pieces
 450             = new std::vector<_Piece<_DifferenceType> >[__num_threads];
 451           for (int __s = 0; __s < __num_threads; ++__s)
 452             __sd._M_pieces[__s].resize(__num_threads);
 453           _M_starts = __sd._M_starts
 454             = new _DifferenceType[__num_threads + 1];
 455
 456           _DifferenceType __chunk_length = __n / __num_threads;
 457           _DifferenceType __split = __n % __num_threads;
 458           _DifferenceType __pos = 0;
 459           for (int __i = 0; __i < __num_threads; ++__i)
 460             {
 461               _M_starts[__i] = __pos;
 462               __pos += (__i < __split)
 463                 ? (__chunk_length + 1) : __chunk_length;
 464             }
 465           _M_starts[__num_threads] = __pos;
 466         } //single
 467
 468         // Now sort in parallel.
 469         parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
 470       } //parallel
 471
 472       delete[] _M_starts;
 473       delete[] __sd._M_temporary;
 474
 475       if (!__exact)
 476       ::operator delete(__sd._M_samples);
 477
 478       delete[] __sd._M_offsets;
 479       delete[] __sd._M_pieces;
 480     }
 481
 482 } //namespace __gnu_parallel
 483
 484 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */