// -*- C++ -*-
-// Copyright (C) 2007 Free Software Foundation, Inc.
+// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms
// of the GNU General Public License as published by the Free Software
-// Foundation; either version 2, or (at your option) any later
+// Foundation; either version 3, or (at your option) any later
// version.
// This library is distributed in the hope that it will be useful, but
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this library; see the file COPYING. If not, write to
-// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
-// MA 02111-1307, USA.
-
-// As a special exception, you may use this file as part of a free
-// software library without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to
-// produce an executable, this file does not by itself cause the
-// resulting executable to be covered by the GNU General Public
-// License. This exception does not however invalidate any other
-// reasons why the executable file might be covered by the GNU General
-// Public License.
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
/** @file parallel/partial_sum.h
- * @brief Parallel implementation of std::partial_sum(), i. e. prefix
- * sums.
+ * @brief Parallel implementation of std::partial_sum(), i.e. prefix
+* sums.
* This file is a GNU parallel extension to the Standard C++ Library.
*/
#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
#include <omp.h>
+#include <new>
#include <bits/stl_algobase.h>
#include <parallel/parallel.h>
#include <parallel/numericfwd.h>
// Problem: there is no 0-element given.
/** @brief Base case prefix sum routine.
- * @param begin Begin iterator of input sequence.
- * @param end End iterator of input sequence.
- * @param result Begin iterator of output sequence.
- * @param bin_op Associative binary function.
- * @param value Start value. Must be passed since the neutral
+ * @param __begin Begin iterator of input sequence.
+ * @param __end End iterator of input sequence.
+ * @param __result Begin iterator of output sequence.
+ * @param __bin_op Associative binary function.
+ * @param __value Start value. Must be passed since the neutral
* element is unknown in general.
* @return End iterator of output sequence. */
- template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
- inline OutputIterator
- parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
- OutputIterator result, BinaryOperation bin_op,
- typename std::iterator_traits<InputIterator>::value_type value)
- {
- if (begin == end)
- return result;
-
- while (begin != end)
- {
- value = bin_op(value, *begin);
- *result = value;
- result++;
- begin++;
- }
- return result;
- }
+ template<typename _IIter,
+ typename _OutputIterator,
+ typename _BinaryOperation>
+ _OutputIterator
+ __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
+ _OutputIterator __result,
+ _BinaryOperation __bin_op,
+ typename std::iterator_traits <_IIter>::value_type __value)
+ {
+ if (__begin == __end)
+ return __result;
+
+ while (__begin != __end)
+ {
+ __value = __bin_op(__value, *__begin);
+ *__result = __value;
+ ++__result;
+ ++__begin;
+ }
+ return __result;
+ }
/** @brief Parallel partial sum implementation, two-phase approach,
no recursion.
- * @param begin Begin iterator of input sequence.
- * @param end End iterator of input sequence.
- * @param result Begin iterator of output sequence.
- * @param bin_op Associative binary function.
- * @param n Length of sequence.
- * @param num_threads Number of threads to use.
+ * @param __begin Begin iterator of input sequence.
+ * @param __end End iterator of input sequence.
+ * @param __result Begin iterator of output sequence.
+ * @param __bin_op Associative binary function.
+ * @param __n Length of sequence.
+ * @param __num_threads Number of threads to use.
* @return End iterator of output sequence.
*/
- template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
- OutputIterator
- parallel_partial_sum_linear(InputIterator begin, InputIterator end,
- OutputIterator result, BinaryOperation bin_op,
- typename std::iterator_traits<InputIterator>::difference_type n, int num_threads)
- {
- typedef std::iterator_traits<InputIterator> traits_type;
- typedef typename traits_type::value_type value_type;
- typedef typename traits_type::difference_type difference_type;
-
- if (num_threads > (n - 1))
- num_threads = static_cast<thread_index_t>(n - 1);
- if (num_threads < 2)
- {
- *result = *begin;
- return parallel_partial_sum_basecase(begin + 1, end, result + 1, bin_op, *begin);
- }
+ template<typename _IIter,
+ typename _OutputIterator,
+ typename _BinaryOperation>
+ _OutputIterator
+ __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
+ _OutputIterator __result,
+ _BinaryOperation __bin_op,
+ typename std::iterator_traits<_IIter>::difference_type __n)
+ {
+ typedef std::iterator_traits<_IIter> _TraitsType;
+ typedef typename _TraitsType::value_type _ValueType;
+ typedef typename _TraitsType::difference_type _DifferenceType;
- difference_type* borders = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_threads + 2)));
+ if (__begin == __end)
+ return __result;
- if (Settings::partial_sum_dilatation == 1.0f)
- equally_split(n, num_threads + 1, borders);
- else
- {
- difference_type chunk_length = (int)((double)n / ((double)num_threads + Settings::partial_sum_dilatation)), borderstart = n - num_threads * chunk_length;
- borders[0] = 0;
- for (int i = 1; i < (num_threads + 1); i++)
- {
- borders[i] = borderstart;
- borderstart += chunk_length;
- }
- borders[num_threads + 1] = n;
- }
-
- value_type* sums = new value_type[num_threads];
- OutputIterator target_end;
-
-#pragma omp parallel num_threads(num_threads)
- {
- int id = omp_get_thread_num();
- if (id == 0)
- {
- *result = *begin;
- parallel_partial_sum_basecase(begin + 1, begin + borders[1],
- result + 1, bin_op, *begin);
- sums[0] = *(result + borders[1] - 1);
- }
- else
+ _ThreadIndex __num_threads =
+ std::min<_DifferenceType>(__get_max_threads(), __n - 1);
+
+ if (__num_threads < 2)
{
- sums[id] = std::accumulate(begin + borders[id] + 1,
- begin + borders[id + 1],
- *(begin + borders[id]),
- bin_op, __gnu_parallel::sequential_tag());
+ *__result = *__begin;
+ return __parallel_partial_sum_basecase(__begin + 1, __end,
+ __result + 1, __bin_op,
+ *__begin);
}
-#pragma omp barrier
-
-#pragma omp single
- parallel_partial_sum_basecase(sums + 1, sums + num_threads, sums + 1,
- bin_op, sums[0]);
+ _DifferenceType* __borders;
+ _ValueType* __sums;
-#pragma omp barrier
+ const _Settings& __s = _Settings::get();
- // Still same team.
- parallel_partial_sum_basecase(begin + borders[id + 1],
- begin + borders[id + 2],
- result + borders[id + 1], bin_op,
- sums[id]);
+# pragma omp parallel num_threads(__num_threads)
+ {
+# pragma omp single
+ {
+ __num_threads = omp_get_num_threads();
+
+ __borders = new _DifferenceType[__num_threads + 2];
+
+ if (__s.partial_sum_dilation == 1.0f)
+ __equally_split(__n, __num_threads + 1, __borders);
+ else
+ {
+ _DifferenceType __first_part_length =
+ std::max<_DifferenceType>(1,
+ __n / (1.0f + __s.partial_sum_dilation * __num_threads));
+ _DifferenceType __chunk_length =
+ (__n - __first_part_length) / __num_threads;
+ _DifferenceType __borderstart =
+ __n - __num_threads * __chunk_length;
+ __borders[0] = 0;
+ for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
+ {
+ __borders[__i] = __borderstart;
+ __borderstart += __chunk_length;
+ }
+ __borders[__num_threads + 1] = __n;
+ }
+
+ __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
+ * __num_threads));
+ _OutputIterator __target_end;
+ } //single
+
+ _ThreadIndex __iam = omp_get_thread_num();
+ if (__iam == 0)
+ {
+ *__result = *__begin;
+ __parallel_partial_sum_basecase(__begin + 1,
+ __begin + __borders[1],
+ __result + 1,
+ __bin_op, *__begin);
+ ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
+ }
+ else
+ {
+ ::new(&(__sums[__iam]))
+ _ValueType(__gnu_parallel::accumulate(
+ __begin + __borders[__iam] + 1,
+ __begin + __borders[__iam + 1],
+ *(__begin + __borders[__iam]),
+ __bin_op,
+ __gnu_parallel::sequential_tag()));
+ }
+
+# pragma omp barrier
+
+# pragma omp single
+ __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
+ __sums + 1, __bin_op, __sums[0]);
+
+# pragma omp barrier
+
+ // Still same team.
+ __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
+ __begin + __borders[__iam + 2],
+ __result + __borders[__iam + 1],
+ __bin_op, __sums[__iam]);
+ } //parallel
+
+ for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
+ __sums[__i].~_ValueType();
+ ::operator delete(__sums);
+
+ delete[] __borders;
+
+ return __result + __n;
}
- delete [] sums;
-
- return result + n;
- }
-
- /** @brief Parallel partial sum front-end.
- * @param begin Begin iterator of input sequence.
- * @param end End iterator of input sequence.
- * @param result Begin iterator of output sequence.
- * @param bin_op Associative binary function.
+ /** @brief Parallel partial sum front-__end.
+ * @param __begin Begin iterator of input sequence.
+ * @param __end End iterator of input sequence.
+ * @param __result Begin iterator of output sequence.
+ * @param __bin_op Associative binary function.
* @return End iterator of output sequence. */
- template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
- OutputIterator
- parallel_partial_sum(InputIterator begin, InputIterator end,
- OutputIterator result, BinaryOperation bin_op)
- {
- _GLIBCXX_CALL(begin - end);
-
- typedef std::iterator_traits<InputIterator> traits_type;
- typedef typename traits_type::value_type value_type;
- typedef typename traits_type::difference_type difference_type;
+ template<typename _IIter,
+ typename _OutputIterator,
+ typename _BinaryOperation>
+ _OutputIterator
+ __parallel_partial_sum(_IIter __begin, _IIter __end,
+ _OutputIterator __result, _BinaryOperation __bin_op)
+ {
+ _GLIBCXX_CALL(__begin - __end)
- difference_type n = end - begin;
+ typedef std::iterator_traits<_IIter> _TraitsType;
+ typedef typename _TraitsType::value_type _ValueType;
+ typedef typename _TraitsType::difference_type _DifferenceType;
- int num_threads = get_max_threads();
+ _DifferenceType __n = __end - __begin;
- switch (Settings::partial_sum_algorithm)
- {
- case Settings::LINEAR:
- // Need an initial offset.
- return parallel_partial_sum_linear(begin, end, result, bin_op,
- n, num_threads);
- default:
- // Partial_sum algorithm not implemented.
- _GLIBCXX_PARALLEL_ASSERT(0);
- return result + n;
- }
- }
+ switch (_Settings::get().partial_sum_algorithm)
+ {
+ case LINEAR:
+ // Need an initial offset.
+ return __parallel_partial_sum_linear(__begin, __end, __result,
+ __bin_op, __n);
+ default:
+ // Partial_sum algorithm not implemented.
+ _GLIBCXX_PARALLEL_ASSERT(0);
+ return __result + __n;
+ }
+ }
}
-#endif
+#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */