1 # a "yield" version of the Parallel Reduction/Prefix-Sum REMAP algorithm.
2 # the algorithm is in-place. it does not perform "MV" operations.
4 from copy import deepcopy
5 import operator
8 # first, we'll have a simpler prefix-sum algorithm so we can logically work our
9 # way up to the full algorithm:
10 def prefix_sum_work_efficient(item_count):
11 # this is based on:
12 # `nmutil.prefix_sum.prefix_sum_ops(..., work_efficient=True)`
14 # compute the partial sums using a set of binary trees
15 dist = 1
16 while dist < item_count:
17 start = dist * 2 - 1
18 step = dist * 2
19 for i in reversed(range(start, item_count, step)):
20 lhs = i - dist
21 # order is important for non-commutative ops -- don't swap lhs/rhs
22 yield f"items[{i}] = items[{lhs}] + items[{i}]"
23 dist <<= 1
24 # express all output items in terms of the computed partial sums.
25 dist >>= 1
26 while dist >= 1:
27 for i in reversed(range(dist * 3 - 1, item_count, dist * 2)):
28 lhs = i - dist
29 # order is important for non-commutative ops -- don't swap lhs/rhs
30 yield f"items[{i}] = items[{lhs}] + items[{i}]"
31 dist >>= 1
34 # python "yield" can be iterated. use this to make it clear how
35 # the indices are generated by using natural-looking nested loops
36 def iterate_indices2(SVSHAPE, pred=None):
37 # this is a copy of iterate_indices with reversing `steps` removed since
38 # that is useless afaict. scan/prefix-sum support is also added.
40 # get indices to iterate over, in the required order
41 xd = SVSHAPE.lims[0]
42 # create lists of indices to iterate over in each dimension
43 ix = list(range(xd))
44 # invert the indices if needed
45 if SVSHAPE.invxyz[0]:
46 ix.reverse()
47 if SVSHAPE.order != (0, 1, 2) or any(SVSHAPE.invxyz[1:]):
48 raise ValueError("undefined")
49 if SVSHAPE.skip & 0b10:
50 # this is a scan/prefix-sum rather than a reduction.
51 if pred is not None and any(not pred[i] for i in ix):
52 raise ValueError("undefined")
53 results = []
54 dist = 1
55 while dist < xd:
56 start = dist * 2 - 1
57 step = dist * 2
58 for i in reversed(range(start, xd, step)):
59 lhs = i - dist
60 # yield f"items[{i}] = items[{lhs}] + items[{i}]"
61 v = ix[i if SVSHAPE.skip & 0b1 else lhs]
62 results.append([v + SVSHAPE.offset, 0])
63 if results:
64 results[-1][1] |= 0b001 # end of inner loop
65 dist <<= 1
66 if results:
67 results[-1][1] |= 0b010 # end of first loop nest
68 # express all output items in terms of the computed partial sums.
69 dist >>= 1
70 while dist >= 1:
71 for i in reversed(range(dist * 3 - 1, xd, dist * 2)):
72 lhs = i - dist
73 # yield f"items[{i}] = items[{lhs}] + items[{i}]"
74 v = ix[i if SVSHAPE.skip & 0b1 else lhs]
75 results.append([v + SVSHAPE.offset, 0])
76 if results:
77 results[-1][1] |= 0b001 # end of inner loop
78 dist >>= 1
79 if results:
80 results[-1][1] = 0b111 # end of full algorithm
81 yield from results
82 return
83 # start a loop from the lowest step
84 step = 1
85 while step < xd:
86 step *= 2
87 stepend = step >= xd # note end of steps
88 results = []
89 for i in range(0, xd, step):
90 other = i + step // 2
91 ci = ix[i]
92 oi = ix[other] if other < xd else None
93 other_pred = other < xd and (pred is None or pred[oi])
94 if (pred is None or pred[ci]) and other_pred:
95 if SVSHAPE.skip == 0b00: # submode 00
96 result = ci
97 elif SVSHAPE.skip == 0b01: # submode 01
98 result = oi
99 results.append([result + SVSHAPE.offset, 0])
100 elif other_pred:
101 ix[i] = oi
102 if results:
103 results[-1][1] = (stepend << 1) | 1 # notify end of loops
104 yield from results
107 def demo_prefix_sum():
108 # set the dimension sizes here
109 xdim = 9
111 # set up an SVSHAPE
112 class SVSHAPE:
113 pass
114 SVSHAPE0 = SVSHAPE()
115 SVSHAPE0.lims = [xdim, 0, 0]
116 SVSHAPE0.order = 0, 1, 2
117 SVSHAPE0.mode = 0b10
118 SVSHAPE0.skip = 0b10 # prefix-sum lhs
119 SVSHAPE0.offset = 0 # experiment with different offset, here
120 SVSHAPE0.invxyz = [0, 0, 0] # inversion if desired
122 SVSHAPE1 = SVSHAPE()
123 SVSHAPE1.lims = [xdim, 0, 0]
124 SVSHAPE1.order = 0, 1, 2
125 SVSHAPE1.mode = 0b10
126 SVSHAPE1.skip = 0b11 # prefix-sum rhs
127 SVSHAPE1.offset = 0 # experiment with different offset, here
128 SVSHAPE1.invxyz = [0, 0, 0] # inversion if desired
130 # enumerate over the iterator function, getting new indices
131 shapes = list(iterate_indices2(SVSHAPE0)), list(iterate_indices2(SVSHAPE1))
132 for idx in range(len(shapes[0])):
133 l_idx, lend = shapes[0][idx]
134 r_idx, rend = shapes[1][idx]
135 # note RT is r_idx, not l_idx -- this is different than reduction
136 print(f"{idx}: r{r_idx} = r{l_idx} + r{r_idx} "
137 f"lend={lend:03b} rend={rend:03b}")
140 # python "yield" can be iterated. use this to make it clear how
141 # the indices are generated by using natural-looking nested loops
142 def iterate_indices(SVSHAPE, pred=None):
143 # get indices to iterate over, in the required order
144 xd = SVSHAPE.lims[0]
145 # create lists of indices to iterate over in each dimension
146 ix = list(range(xd))
147 # invert the indices if needed
148 if SVSHAPE.invxyz[0]:
149 ix.reverse()
150 # start a loop from the lowest step
151 step = 1
152 steps = []
153 while step < xd:
154 step *= 2
155 steps.append(step)
156 # invert the indices if needed
157 if SVSHAPE.invxyz[1]:
158 steps.reverse()
159 for step in steps:
160 stepend = (step == steps[-1]) # note end of steps
161 idxs = list(range(0, xd, step))
162 results = []
163 for i in idxs:
164 other = i + step // 2
165 ci = ix[i]
166 oi = ix[other] if other < xd else None
167 other_pred = other < xd and (pred is None or pred[oi])
168 if (pred is None or pred[ci]) and other_pred:
169 if SVSHAPE.skip == 0b00: # submode 00
170 result = ci
171 elif SVSHAPE.skip == 0b01: # submode 01
172 result = oi
173 results.append([result + SVSHAPE.offset, 0])
174 elif other_pred:
175 ix[i] = oi
176 if results:
177 results[-1][1] = (stepend << 1) | 1 # notify end of loops
178 yield from results
181 def demo():
182 # set the dimension sizes here
183 xdim = 9
185 # set up an SVSHAPE
186 class SVSHAPE:
187 pass
188 SVSHAPE0 = SVSHAPE()
189 SVSHAPE0.lims = [xdim, 0, 0]
190 SVSHAPE0.order = [0, 1, 2]
191 SVSHAPE0.mode = 0b10
192 SVSHAPE0.skip = 0b00
193 SVSHAPE0.offset = 0 # experiment with different offset, here
194 SVSHAPE0.invxyz = [0, 0, 0] # inversion if desired
196 SVSHAPE1 = SVSHAPE()
197 SVSHAPE1.lims = [xdim, 0, 0]
198 SVSHAPE1.order = [0, 1, 2]
199 SVSHAPE1.mode = 0b10
200 SVSHAPE1.skip = 0b01
201 SVSHAPE1.offset = 0 # experiment with different offset, here
202 SVSHAPE1.invxyz = [0, 0, 0] # inversion if desired
204 # enumerate over the iterator function, getting new indices
205 shapes = list(iterate_indices(SVSHAPE0)), list(iterate_indices(SVSHAPE1))
206 for idx in range(len(shapes[0])):
207 l = shapes[0][idx]
208 r = shapes[1][idx]
209 (l_idx, lend) = l
210 (r_idx, rend) = r
211 print("%d->%d:%d" % (idx, l_idx, r_idx),
212 "end", bin(lend)[2:], bin(rend)[2:])
215 def preduce_y(vec, pred=None, operation=None):
216 if operation is None:
219 res = deepcopy(vec)
220 xdim = len(vec)
221 # set up an SVSHAPE
223 class SVSHAPE:
224 pass
225 SVSHAPE0 = SVSHAPE()
226 SVSHAPE0.lims = [xdim, 0, 0]
227 SVSHAPE0.order = [0, 1, 2]
228 SVSHAPE0.mode = 0b10
229 SVSHAPE0.skip = 0b00
230 SVSHAPE0.offset = 0 # experiment with different offset, here
231 SVSHAPE0.invxyz = [0, 0, 0] # inversion if desired
233 SVSHAPE1 = SVSHAPE()
234 SVSHAPE1.lims = [xdim, 0, 0]
235 SVSHAPE1.order = [0, 1, 2]
236 SVSHAPE1.mode = 0b10
237 SVSHAPE1.skip = 0b01
238 SVSHAPE1.offset = 0 # experiment with different offset, here
239 SVSHAPE1.invxyz = [0, 0, 0] # inversion if desired
241 # enumerate over the iterator function, getting new indices
242 shapes = list(iterate_indices(SVSHAPE0)), list(iterate_indices(SVSHAPE1))
243 for idx in range(len(shapes[0])):
244 l = shapes[0][idx]
245 r = shapes[1][idx]
246 (l_idx, lend) = l
247 (r_idx, rend) = r
248 res[l_idx] = operation(res[l_idx], res[r_idx])
249 return res
252 # run the demo
253 if __name__ == '__main__':
254 print("reduction:")
255 demo()
256 print("prefix-sum:")
257 demo_prefix_sum()
258 print("reduction results:")
259 vec = [1, 2, 3, 4, 9, 5, 6]
260 res = preduce_y(vec)
261 print(vec)
262 print(res)