add yield version of parallel-reduce
[libreriscv.git] / openpower / sv / preduce.py
1 from copy import copy
2
3 def preduce(vl, vec, pred):
4 vec = copy(vec)
5 pred = copy(pred) # must not damage predicate
6 step = 1
7 print(" start", step, pred, vec)
8 while step < vl:
9 step *= 2
10 for i in range(0, vl, step):
11 other = i + step // 2
12 other_pred = other < vl and pred[other]
13 if pred[i] and other_pred:
14 vec[i] += vec[other]
15 elif other_pred:
16 vec[i] = vec[other]
17 pred[i] |= other_pred
18 print(" row", step, pred, vec)
19 return vec
20
21
22 def preducei(vl, vec, pred):
23 vec = copy(vec)
24 pred = copy(pred) # must not damage predicate
25 step = 1
26 ix = list(range(vl)) # indices move rather than copy data
27 print(" start", step, pred, vec)
28 while step < vl:
29 step *= 2
30 for i in range(0, vl, step):
31 other = i + step // 2
32 ci = ix[i]
33 oi = ix[other] if other < vl else None
34 other_pred = other < vl and pred[oi]
35 if pred[ci] and other_pred:
36 vec[ci] += vec[oi]
37 elif other_pred:
38 ix[i] = oi # leave data in-place, copy index instead
39 pred[ci] |= other_pred
40 print(" row", step, pred, vec, ix)
41 return vec
42
43
44 def preduce_yield(vl, vec, pred):
45 pred = copy(pred) # must not damage predicate
46 step = 1
47 ix = list(range(vl))
48 while step < vl:
49 step *= 2
50 for i in range(0, vl, step):
51 other = i + step // 2
52 ci = ix[i]
53 oi = ix[other] if other < vl else None
54 other_pred = other < vl and pred[oi]
55 if pred[ci] and other_pred:
56 yield ci, oi
57 elif other_pred:
58 ix[i] = oi
59 pred[ci] |= other_pred
60
61
62 def preduce_y(vl, vec, pred):
63 for i, other in preduce_yield(vl, vec, pred):
64 vec[i] += vec[other]
65
66
67 if __name__ == '__main__':
68 vec = [1, 2, 3, 4, 9, 5, 6]
69 prd = [0, 1, 1, 1, 0, 0, 1]
70 print (vec)
71 res = preduce(len(vec), vec, prd)
72 print (res)
73 res2 = preducei(len(vec), vec, prd)
74 print (res2)
75 print ()
76 preduce_y(len(vec), vec, prd)
77 print (vec)
78 print ()
79 assert vec == res2
80
81 vec = [1, 2, 3, 4, 9, 5, 6]
82 prd = [1, 0, 0, 1, 1, 0, 1]
83 print (vec)
84 res = preduce(len(vec), vec, prd)
85 print (res)
86 res2 = preducei(len(vec), vec, prd)
87 print (res2)
88 print ()
89 preduce_y(len(vec), vec, prd)
90 print (vec)
91 print ()
92 assert vec == res2
93