j <- j + newstep
i <- i + 1
step <- newstep
+ # VL in Parallel-Reduce is the number of operations
vlen[0:6] <- i
- # VL in Matrix Multiply is xd*yd*zd
- n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
- vlen[0:6] <- n[14:20]
- # set up template in SVSHAPE0, then copy to 1-3
+ # set up template in SVSHAPE0, then copy to 1. only 2 needed
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
- SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
- SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
- SVSHAPE0[28:29] <- 0b11 # skip z
+ SVSHAPE0[30:31] <- 0b10 # parallel reduce submode
# copy
SVSHAPE1[0:31] <- SVSHAPE0[0:31]
- SVSHAPE2[0:31] <- SVSHAPE0[0:31]
- SVSHAPE3[0:31] <- SVSHAPE0[0:31]
- # set up FRA
- SVSHAPE1[18:20] <- 0b001 # permute x,z,y
- SVSHAPE1[28:29] <- 0b01 # skip z
- # FRC
- SVSHAPE2[18:20] <- 0b001 # permute x,z,y
- SVSHAPE2[28:29] <- 0b11 # skip y
+ # set up right operand (left operand 28:29 is zero)
+ SVSHAPE1[28:29] <- 0b01 # right operand
# set VL, MVL and Vertical-First
SVSTATE[0:6] <- vlen
SVSTATE[7:13] <- vlen
# a "yield" version of the Parallel Reduction REMAP algorithm.
# the algorithm is in-place. it does not perform "MV" operations.
# instead, where a masked-out value *should* be read from is tracked
+from copy import deepcopy
# python "yield" can be iterated. use this to make it clear how
# the indices are generated by using natural-looking nested loops
"end", bin(lend)[2:], bin(rend)[2:])
-def preduce_y(vl, vec, pred):
- for i, other in preduce_yield(vl, vec, pred):
- vec[i] += vec[other]
+def preduce_y(vec, pred=None):
+ res = deepcopy(vec)
+ xdim = len(vec)
+ # set up an SVSHAPE
+ class SVSHAPE:
+ pass
+ SVSHAPE0 = SVSHAPE()
+ SVSHAPE0.lims = [xdim, 0, 0]
+ SVSHAPE0.order = [0,1,2]
+ SVSHAPE0.mode = 0b10
+ SVSHAPE0.skip = 0b00
+ SVSHAPE0.offset = 0 # experiment with different offset, here
+ SVSHAPE0.invxyz = [0,0,0] # inversion if desired
+ SVSHAPE1 = SVSHAPE()
+ SVSHAPE1.lims = [xdim, 0, 0]
+ SVSHAPE1.order = [0,1,2]
+ SVSHAPE1.mode = 0b10
+ SVSHAPE1.skip = 0b01
+ SVSHAPE1.offset = 0 # experiment with different offset, here
+ SVSHAPE1.invxyz = [0,0,0] # inversion if desired
+
+ # enumerate over the iterator function, getting new indices
+ shapes = list(iterate_indices(SVSHAPE0)), \
+ list(iterate_indices(SVSHAPE1))
+ for idx in range(len(shapes[0])):
+ l = shapes[0][idx]
+ r = shapes[1][idx]
+ (l_idx, lend) = l
+ (r_idx, rend) = r
+ res[l_idx] += res[r_idx]
+ return res
# run the demo
if __name__ == '__main__':
demo()
-
+ vec = [1, 2, 3, 4, 9, 5, 6]
+ res = preduce_y(vec)
+ print (vec)
+ print (res)
from openpower.consts import SVP64CROffs
from copy import deepcopy
from openpower.decoder.helpers import fp64toselectable
+from openpower.decoder.isa.remap_preduce_yield import preduce_y
from functools import reduce
import operator
self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
def test_sv_remap1(self):
- """>>> lst = ["svshape 2, 2, 0, 7, 0",
- "svremap 31, 1, 2, 3, 0, 0, 0",
- #"sv.fadd *0, *8, *16"
+ """>>> lst = ["svshape 7, 0, 0, 7, 0",
+ "svremap 31, 0, 1, 0, 0, 0, 0",
+ "sv.add *0, *8, *16"
]
REMAP fmadds FRT, FRA, FRC, FRB
"""
- lst = SVP64Asm(["svshape 2, 2, 0, 7, 0",
- #"svremap 31, 1, 2, 3, 0, 0, 0",
- #"sv.fmadds *0, *16, *32, *0"
+ lst = SVP64Asm(["svshape 7, 0, 0, 7, 0",
+ "svremap 31, 0, 1, 0, 0, 0, 0",
+ "sv.add *0, *0, *0"
])
lst = list(lst)
- fprs = [0] * 64
- # 3x2 matrix
- X1 = [[1, 2, 3],
- [3, 4, 5],
- ]
- # 2x3 matrix
- Y1 = [[6, 7],
- [8, 9],
- [10, 11],
- ]
-
- X = X1
- Y = Y1
-
- xf = reduce(operator.add, X)
- yf = reduce(operator.add, Y)
- print ("flattened X,Y")
- print ("\t", xf)
- print ("\t", yf)
+ gprs = [0] * 64
+ vec = [1, 2, 3, 4, 9, 5, 6]
# and create a linear result2, same scheme
#result1 = [0] * (ydim1*xdim2)
res = []
- # store FPs
- for i, x in enumerate(xf):
- fprs[i+16] = fp64toselectable(float(x)) # X matrix
- for i, y in enumerate(yf):
- fprs[i+32] = fp64toselectable(float(y)) # Y matrix
- continue
- #t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
- #u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
- #res.append((t, u))
- #print ("FFT", i, "in", a, b, "coeff", c, "mul",
- # mul, "res", t, u)
+ # store GPRs
+ for i, x in enumerate(vec):
+ gprs[i] = x
with Program(lst, bigendian=False) as program:
- sim = self.run_tst_program(program, initial_fprs=fprs)
+ sim = self.run_tst_program(program, initial_regs=gprs)
print ("spr svshape0", sim.spr['SVSHAPE0'])
print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
print ("spr svshape1", sim.spr['SVSHAPE1'])
print ("spr svshape2", sim.spr['SVSHAPE2'])
print ("spr svshape3", sim.spr['SVSHAPE3'])
- for i in range(4):
- print ("i", i, float(sim.fpr(i)))
+ for i in range(7):
+ val = sim.gpr(i).value
+ res.append(val)
+ print ("i", i, val)
# confirm that the results are as expected
- #for i, (t, u) in enumerate(res):
- # self.assertEqual(sim.fpr(i+2), t)
- # self.assertEqual(sim.fpr(i+6), u)
+ expected = preduce_y(vec)
+ for i, v in enumerate(res):
+ self.assertEqual(v, expected[i])
def run_tst_program(self, prog, initial_regs=None,