From: Luke Kenneth Casson Leighton Date: Tue, 6 Sep 2022 14:50:35 +0000 (+0100) Subject: add first functional confirmed unit test for parallel reduce REMAP X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e88b945123f2c3f047748a44fdf446775517076a;p=openpower-isa.git add first functional confirmed unit test for parallel reduce REMAP https://bugs.libre-soc.org/show_bug.cgi?id=864 using remap_preduce_yield directly, confirmed operational through ISACaller added through decoder/isa/svshape.py which is responsible in SVSHAPE.getiterator() for returning the appropriate yield-iterator --- diff --git a/openpower/isa/simplev.mdwn b/openpower/isa/simplev.mdwn index 61dabd98..32ad1314 100644 --- a/openpower/isa/simplev.mdwn +++ b/openpower/isa/simplev.mdwn @@ -279,25 +279,15 @@ Pseudo-code: j <- j + newstep i <- i + 1 step <- newstep + # VL in Parallel-Reduce is the number of operations vlen[0:6] <- i - # VL in Matrix Multiply is xd*yd*zd - n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd) - vlen[0:6] <- n[14:20] - # set up template in SVSHAPE0, then copy to 1-3 + # set up template in SVSHAPE0, then copy to 1. only 2 needed SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim - SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim - SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - SVSHAPE0[28:29] <- 0b11 # skip z + SVSHAPE0[30:31] <- 0b10 # parallel reduce submode # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] - SVSHAPE2[0:31] <- SVSHAPE0[0:31] - SVSHAPE3[0:31] <- SVSHAPE0[0:31] - # set up FRA - SVSHAPE1[18:20] <- 0b001 # permute x,z,y - SVSHAPE1[28:29] <- 0b01 # skip z - # FRC - SVSHAPE2[18:20] <- 0b001 # permute x,z,y - SVSHAPE2[28:29] <- 0b11 # skip y + # set up right operand (left operand 28:29 is zero) + SVSHAPE1[28:29] <- 0b01 # right operand # set VL, MVL and Vertical-First SVSTATE[0:6] <- vlen SVSTATE[7:13] <- vlen diff --git a/src/openpower/decoder/isa/remap_preduce_yield.py b/src/openpower/decoder/isa/remap_preduce_yield.py index 8c9e506a..b9433fb8 100644 --- a/src/openpower/decoder/isa/remap_preduce_yield.py +++ b/src/openpower/decoder/isa/remap_preduce_yield.py @@ -1,6 +1,7 @@ # a "yield" version of the Parallel Reduction REMAP algorithm. # the algorithm is in-place. it does not perform "MV" operations. # instead, where a masked-out value *should* be read from is tracked +from copy import deepcopy # python "yield" can be iterated. use this to make it clear how # the indices are generated by using natural-looking nested loops @@ -75,12 +76,43 @@ def demo(): "end", bin(lend)[2:], bin(rend)[2:]) -def preduce_y(vl, vec, pred): - for i, other in preduce_yield(vl, vec, pred): - vec[i] += vec[other] +def preduce_y(vec, pred=None): + res = deepcopy(vec) + xdim = len(vec) + # set up an SVSHAPE + class SVSHAPE: + pass + SVSHAPE0 = SVSHAPE() + SVSHAPE0.lims = [xdim, 0, 0] + SVSHAPE0.order = [0,1,2] + SVSHAPE0.mode = 0b10 + SVSHAPE0.skip = 0b00 + SVSHAPE0.offset = 0 # experiment with different offset, here + SVSHAPE0.invxyz = [0,0,0] # inversion if desired + SVSHAPE1 = SVSHAPE() + SVSHAPE1.lims = [xdim, 0, 0] + SVSHAPE1.order = [0,1,2] + SVSHAPE1.mode = 0b10 + SVSHAPE1.skip = 0b01 + SVSHAPE1.offset = 0 # experiment with different offset, here + SVSHAPE1.invxyz = [0,0,0] # inversion if desired + + # enumerate over the iterator function, getting new indices + shapes = list(iterate_indices(SVSHAPE0)), \ + list(iterate_indices(SVSHAPE1)) + for idx in range(len(shapes[0])): + l = shapes[0][idx] + r = shapes[1][idx] + (l_idx, lend) = l + (r_idx, rend) = r + res[l_idx] += res[r_idx] + return res # run the demo if __name__ == '__main__': demo() - + vec = [1, 2, 3, 4, 9, 5, 6] + res = preduce_y(vec) + print (vec) + print (res) diff --git a/src/openpower/decoder/isa/svshape.py b/src/openpower/decoder/isa/svshape.py index 5530b28d..2bdf530b 100644 --- a/src/openpower/decoder/isa/svshape.py +++ b/src/openpower/decoder/isa/svshape.py @@ -9,6 +9,8 @@ https://libre-soc.org/openpower/sv/remap from openpower.decoder.selectable_int import (FieldSelectableInt, SelectableInt, selectconcat) from openpower.decoder.isa.remapyield import iterate_indices +from openpower.decoder.isa.remap_preduce_yield import (iterate_indices as + iterate_preduce_indices) from openpower.decoder.isa.remap_fft_yield import iterate_butterfly_indices from openpower.decoder.isa.remap_dct_yield import ( iterate_dct_inner_butterfly_indices, @@ -163,6 +165,8 @@ class SVSHAPE(SelectableInt): log ("SVSHAPE get_iterator", self.mode, self.ydimsz, self.is_indexed()) if self.mode == 0b00: iterate_fn = iterate_indices + elif self.mode == 0b10: + iterate_fn = iterate_preduce_indices elif self.mode in [0b01, 0b11]: # further sub-selection if self.ydimsz == 1: diff --git a/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py b/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py index 5e9b7c6b..7b607ab8 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py +++ b/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py @@ -15,6 +15,7 @@ from openpower.sv.trans.svp64 import SVP64Asm from openpower.consts import SVP64CROffs from copy import deepcopy from openpower.decoder.helpers import fp64toselectable +from openpower.decoder.isa.remap_preduce_yield import preduce_y from functools import reduce import operator @@ -26,57 +27,32 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64)) def test_sv_remap1(self): - """>>> lst = ["svshape 2, 2, 0, 7, 0", - "svremap 31, 1, 2, 3, 0, 0, 0", - #"sv.fadd *0, *8, *16" + """>>> lst = ["svshape 7, 0, 0, 7, 0", + "svremap 31, 0, 1, 0, 0, 0, 0", + "sv.add *0, *8, *16" ] REMAP fmadds FRT, FRA, FRC, FRB """ - lst = SVP64Asm(["svshape 2, 2, 0, 7, 0", - #"svremap 31, 1, 2, 3, 0, 0, 0", - #"sv.fmadds *0, *16, *32, *0" + lst = SVP64Asm(["svshape 7, 0, 0, 7, 0", + "svremap 31, 0, 1, 0, 0, 0, 0", + "sv.add *0, *0, *0" ]) lst = list(lst) - fprs = [0] * 64 - # 3x2 matrix - X1 = [[1, 2, 3], - [3, 4, 5], - ] - # 2x3 matrix - Y1 = [[6, 7], - [8, 9], - [10, 11], - ] - - X = X1 - Y = Y1 - - xf = reduce(operator.add, X) - yf = reduce(operator.add, Y) - print ("flattened X,Y") - print ("\t", xf) - print ("\t", yf) + gprs = [0] * 64 + vec = [1, 2, 3, 4, 9, 5, 6] # and create a linear result2, same scheme #result1 = [0] * (ydim1*xdim2) res = [] - # store FPs - for i, x in enumerate(xf): - fprs[i+16] = fp64toselectable(float(x)) # X matrix - for i, y in enumerate(yf): - fprs[i+32] = fp64toselectable(float(y)) # Y matrix - continue - #t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single - #u = DOUBLE2SINGLE(fp64toselectable(u)) # from double - #res.append((t, u)) - #print ("FFT", i, "in", a, b, "coeff", c, "mul", - # mul, "res", t, u) + # store GPRs + for i, x in enumerate(vec): + gprs[i] = x with Program(lst, bigendian=False) as program: - sim = self.run_tst_program(program, initial_fprs=fprs) + sim = self.run_tst_program(program, initial_regs=gprs) print ("spr svshape0", sim.spr['SVSHAPE0']) print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz) print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz) @@ -84,12 +60,14 @@ class DecoderTestCase(FHDLTestCase): print ("spr svshape1", sim.spr['SVSHAPE1']) print ("spr svshape2", sim.spr['SVSHAPE2']) print ("spr svshape3", sim.spr['SVSHAPE3']) - for i in range(4): - print ("i", i, float(sim.fpr(i))) + for i in range(7): + val = sim.gpr(i).value + res.append(val) + print ("i", i, val) # confirm that the results are as expected - #for i, (t, u) in enumerate(res): - # self.assertEqual(sim.fpr(i+2), t) - # self.assertEqual(sim.fpr(i+6), u) + expected = preduce_y(vec) + for i, v in enumerate(res): + self.assertEqual(v, expected[i]) def run_tst_program(self, prog, initial_regs=None,