3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file parallel/partial_sum.h
26 * @brief Parallel implementation of std::partial_sum(), i.e. prefix
28 * This file is a GNU parallel extension to the Standard C++ Library.
31 // Written by Johannes Singler.
33 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
34 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
38 #include <bits/stl_algobase.h>
39 #include <parallel/parallel.h>
40 #include <parallel/numericfwd.h>
42 namespace __gnu_parallel
44 // Problem: there is no 0-element given.
46 /** @brief Base case prefix sum routine.
47 * @param __begin Begin iterator of input sequence.
48 * @param __end End iterator of input sequence.
49 * @param __result Begin iterator of output sequence.
50 * @param __bin_op Associative binary function.
51 * @param __value Start value. Must be passed since the neutral
52 * element is unknown in general.
53 * @return End iterator of output sequence. */
54 template<typename _IIter
,
55 typename _OutputIterator
,
56 typename _BinaryOperation
>
58 __parallel_partial_sum_basecase(_IIter __begin
, _IIter __end
,
59 _OutputIterator __result
, _BinaryOperation __bin_op
,
60 typename
std::iterator_traits
61 <_IIter
>::value_type __value
)
66 while (__begin
!= __end
)
68 __value
= __bin_op(__value
, *__begin
);
76 /** @brief Parallel partial sum implementation, two-phase approach,
78 * @param __begin Begin iterator of input sequence.
79 * @param __end End iterator of input sequence.
80 * @param __result Begin iterator of output sequence.
81 * @param __bin_op Associative binary function.
82 * @param __n Length of sequence.
83 * @param __num_threads Number of threads to use.
84 * @return End iterator of output sequence.
86 template<typename _IIter
,
87 typename _OutputIterator
,
88 typename _BinaryOperation
>
90 __parallel_partial_sum_linear(_IIter __begin
, _IIter __end
,
91 _OutputIterator __result
, _BinaryOperation __bin_op
,
92 typename
std::iterator_traits
93 <_IIter
>::difference_type __n
)
95 typedef std::iterator_traits
<_IIter
> _TraitsType
;
96 typedef typename
_TraitsType::value_type _ValueType
;
97 typedef typename
_TraitsType::difference_type _DifferenceType
;
102 _ThreadIndex __num_threads
=
103 std::min
<_DifferenceType
>(__get_max_threads(), __n
- 1);
105 if (__num_threads
< 2)
107 *__result
= *__begin
;
108 return __parallel_partial_sum_basecase(
109 __begin
+ 1, __end
, __result
+ 1, __bin_op
, *__begin
);
112 _DifferenceType
* __borders
;
115 const _Settings
& __s
= _Settings::get();
117 # pragma omp parallel num_threads(__num_threads)
121 __num_threads
= omp_get_num_threads();
123 __borders
= new _DifferenceType
[__num_threads
+ 2];
125 if (__s
.partial_sum_dilation
== 1.0f
)
126 equally_split(__n
, __num_threads
+ 1, __borders
);
129 _DifferenceType __chunk_length
=
131 / ((double)__num_threads
+ __s
.partial_sum_dilation
)),
132 __borderstart
= __n
- __num_threads
* __chunk_length
;
134 for (int __i
= 1; __i
< (__num_threads
+ 1); ++__i
)
136 __borders
[__i
] = __borderstart
;
137 __borderstart
+= __chunk_length
;
139 __borders
[__num_threads
+ 1] = __n
;
142 __sums
= static_cast<_ValueType
*>(::operator new(sizeof(_ValueType
)
144 _OutputIterator __target_end
;
147 _ThreadIndex __iam
= omp_get_thread_num();
150 *__result
= *__begin
;
151 __parallel_partial_sum_basecase(__begin
+ 1, __begin
+ __borders
[1],
152 __result
+ 1, __bin_op
, *__begin
);
153 ::new(&(__sums
[__iam
])) _ValueType(*(__result
+ __borders
[1] - 1));
157 ::new(&(__sums
[__iam
]))
158 _ValueType(std::accumulate(__begin
+ __borders
[__iam
] + 1,
159 __begin
+ __borders
[__iam
+ 1],
160 *(__begin
+ __borders
[__iam
]),
162 __gnu_parallel::sequential_tag()));
168 __parallel_partial_sum_basecase(
169 __sums
+ 1, __sums
+ __num_threads
, __sums
+ 1, __bin_op
, __sums
[0]);
174 __parallel_partial_sum_basecase(__begin
+ __borders
[__iam
+ 1],
175 __begin
+ __borders
[__iam
+ 2],
176 __result
+ __borders
[__iam
+ 1], __bin_op
,
180 ::operator delete(__sums
);
183 return __result
+ __n
;
186 /** @brief Parallel partial sum front-__end.
187 * @param __begin Begin iterator of input sequence.
188 * @param __end End iterator of input sequence.
189 * @param __result Begin iterator of output sequence.
190 * @param __bin_op Associative binary function.
191 * @return End iterator of output sequence. */
192 template<typename _IIter
,
193 typename _OutputIterator
,
194 typename _BinaryOperation
>
196 __parallel_partial_sum(_IIter __begin
, _IIter __end
,
197 _OutputIterator __result
, _BinaryOperation __bin_op
)
199 _GLIBCXX_CALL(__begin
- __end
)
201 typedef std::iterator_traits
<_IIter
> _TraitsType
;
202 typedef typename
_TraitsType::value_type _ValueType
;
203 typedef typename
_TraitsType::difference_type _DifferenceType
;
205 _DifferenceType __n
= __end
- __begin
;
207 switch (_Settings::get().partial_sum_algorithm
)
210 // Need an initial offset.
211 return __parallel_partial_sum_linear(__begin
, __end
, __result
, __bin_op
, __n
);
213 // Partial_sum algorithm not implemented.
214 _GLIBCXX_PARALLEL_ASSERT(0);
215 return __result
+ __n
;
220 #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */