From c2814018538c5f0761f1fe2d435e4fd7da6d4488 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 5 Jul 2021 19:36:01 +0100 Subject: [PATCH] debugging SVSHAPE for REMAP --- src/openpower/decoder/isa/caller.py | 22 +++++-- src/openpower/decoder/isa/svshape.py | 31 +++++++-- .../decoder/isa/test_caller_svp64_matrix.py | 66 +++++++++++++------ src/openpower/sv/trans/svp64.py | 10 +-- 4 files changed, 90 insertions(+), 39 deletions(-) diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index 40e5ef28..d327d05a 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -1204,16 +1204,25 @@ class ISACaller: # get four SVSHAPEs. here we are hard-coding # SVSHAPE0 to FRT, SVSHAPE1 to FRA, SVSHAPE2 to FRC and # SVSHAPE3 to FRB, assuming "fmadd FRT, FRA, FRC, FRB." - remaps = [self.spr['SVSHAPE0'].get_iterator(), - self.spr['SVSHAPE1'].get_iterator(), - self.spr['SVSHAPE2'].get_iterator(), - self.spr['SVSHAPE3'].get_iterator(), + SVSHAPE0 = self.spr['SVSHAPE0'] + SVSHAPE1 = self.spr['SVSHAPE1'] + SVSHAPE2 = self.spr['SVSHAPE2'] + SVSHAPE3 = self.spr['SVSHAPE3'] + print ("svshape0", bin(SVSHAPE0.value)) + print (" xdim", SVSHAPE0.xdimsz) + print (" ydim", SVSHAPE0.ydimsz) + print (" zdim", SVSHAPE0.zdimsz) + remaps = [SVSHAPE0.get_iterator(), + SVSHAPE1.get_iterator(), + SVSHAPE2.get_iterator(), + SVSHAPE3.get_iterator(), ] for i, remap in enumerate(remaps): # XXX hardcoded! pick dststep for out (i==0) else srcstep step = dststep if (i == 0) else srcstep # this is terrible. O(N^2) looking for the match. but hey. - for idx, remap_idx in remap: + print ("remap", i, step) + for idx, remap_idx in enumerate(remap): if idx == step: break if i == 0: @@ -1225,6 +1234,7 @@ class ISACaller: yield self.dec2.in2_step.eq(step) elif i == 3: yield self.dec2.in3_step.eq(step) + print ("\t", idx, remap_idx) # after that, settle down (combinatorial) to let Vector reg numbers # work themselves out yield Settle() @@ -1482,7 +1492,7 @@ class ISACaller: # to be able to know if it should apply in the next instruction. # also (if going to use this instruction) should disable ability # to interrupt in between. sigh. - self.last_op_svshape = name == 'svshape' + self.last_op_svshape = asmop == 'svremap' self.update_pc_next() diff --git a/src/openpower/decoder/isa/svshape.py b/src/openpower/decoder/isa/svshape.py index 16f90a0a..8b133935 100644 --- a/src/openpower/decoder/isa/svshape.py +++ b/src/openpower/decoder/isa/svshape.py @@ -29,27 +29,27 @@ class SVSHAPE(SelectableInt): @property def xdimsz(self): - return self.fsi['xdimsz'].asint(msb0=True) + return self.fsi['xdimsz'].asint(msb0=True)+1 @xdimsz.setter def xdimsz(self, value): - self.fsi['xdimsz'].eq(value) + self.fsi['xdimsz'].eq(value-1) @property def ydimsz(self): - return self.fsi['ydimsz'].asint(msb0=True) + return self.fsi['ydimsz'].asint(msb0=True)+1 @ydimsz.setter def ydimsz(self, value): - self.fsi['ydimsz'].eq(value) + self.fsi['ydimsz'].eq(value-1) @property def zdimsz(self): - return self.fsi['zdimsz'].asint(msb0=True) + return self.fsi['zdimsz'].asint(msb0=True)+1 @zdimsz.setter def zdimsz(self, value): - self.fsi['zdimsz'].eq(value) + self.fsi['zdimsz'].eq(value-1) @property def lims(self): @@ -60,7 +60,16 @@ class SVSHAPE(SelectableInt): self.xdimsz = value[0] self.ydimsz = value[1] self.zdimsz = value[2] - + + @property + def invxyz(self): + inv = self.fsi['invxyz'].asint(msb0=True) + return [(inv & 0b1), (inv & 0b10) >> 1, (inv & 0b100) >> 2] + + @invxyz.setter + def invxyz(self, value): + self.fsi['invxyz'].eq(value[0] | (value[1]<<1) | (value[2]<<2)) + @property def mode(self): return self.fsi['mode'].asint(msb0=True) @@ -69,6 +78,14 @@ class SVSHAPE(SelectableInt): def mode(self, value): self.fsi['mode'].eq(value) + @property + def skip(self): + return self.fsi['skip'].asint(msb0=True) + + @skip.setter + def skip(self, value): + self.fsi['skip'].eq(value) + @property def offset(self): return self.fsi['offset'].asint(msb0=True) diff --git a/src/openpower/decoder/isa/test_caller_svp64_matrix.py b/src/openpower/decoder/isa/test_caller_svp64_matrix.py index 4a9c3f38..6e86d168 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_matrix.py +++ b/src/openpower/decoder/isa/test_caller_svp64_matrix.py @@ -16,6 +16,9 @@ from openpower.consts import SVP64CROffs from copy import deepcopy from openpower.decoder.helpers import fp64toselectable from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE +from functools import reduce +import operator + class DecoderTestCase(FHDLTestCase): @@ -24,32 +27,51 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64)) def test_sv_remap(self): - """>>> lst = ["svremap 2, 2, 3, 0" + """>>> lst = ["svremap 2, 2, 3, 0", + "sv.fmadds 0.v, 8.v, 16.v, 0.v" ] + REMAP fmadds FRT, FRA, FRC, FRB """ - lst = SVP64Asm(["svremap 2, 2, 3, 0" + lst = SVP64Asm(["svremap 2, 2, 3, 0", + "sv.fmadds 0.v, 8.v, 16.v, 0.v" ]) lst = list(lst) - fprs = [0] * 32 - if False: - av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3 - bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7 - coe = [-1.0, 4.0, 3.1, 6.2] # coefficients - res = [] - # work out the results with the twin mul/add-sub - for i, (a, b, c) in enumerate(zip(av, bv, coe)): - fprs[i+2] = fp64toselectable(a) - fprs[i+6] = fp64toselectable(b) - fprs[i+10] = fp64toselectable(c) - mul = a * c - t = a + mul - u = b - mul - t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single - u = DOUBLE2SINGLE(fp64toselectable(u)) # from double - res.append((t, u)) - print ("FFT", i, "in", a, b, "coeff", c, "mul", - mul, "res", t, u) + fprs = [0] * 64 + # 3x2 matrix + X1 = [[1, 2, 3], + [3, 4, 5], + ] + # 2x3 matrix + Y1 = [[6, 7], + [8, 9], + [10, 11], + ] + + X = X1 + Y = Y1 + + xf = reduce(operator.add, X) + yf = reduce(operator.add, Y) + print ("flattened X,Y") + print ("\t", xf) + print ("\t", yf) + + # and create a linear result2, same scheme + #result2 = [0] * (ydim1*xdim2) + + + res = [] + # store FPs + for i, (x, y) in enumerate(zip(xf, yf)): + fprs[i+8] = fp64toselectable(float(x)) # X matrix + fprs[i+16] = fp64toselectable(float(y)) # Y matrix + continue + #t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single + #u = DOUBLE2SINGLE(fp64toselectable(u)) # from double + #res.append((t, u)) + #print ("FFT", i, "in", a, b, "coeff", c, "mul", + # mul, "res", t, u) # SVSTATE (in this case, VL=12, to cover all of matrix) svstate = SVP64State() @@ -64,6 +86,8 @@ class DecoderTestCase(FHDLTestCase): print ("spr svshape1", sim.spr['SVSHAPE1']) print ("spr svshape2", sim.spr['SVSHAPE2']) print ("spr svshape3", sim.spr['SVSHAPE3']) + for i in range(4): + print ("i", i, float(sim.fpr(i))) # confirm that the results are as expected #for i, (t, u) in enumerate(res): # self.assertEqual(sim.fpr(i+2), t) diff --git a/src/openpower/sv/trans/svp64.py b/src/openpower/sv/trans/svp64.py index bf192028..e233eb22 100644 --- a/src/openpower/sv/trans/svp64.py +++ b/src/openpower/sv/trans/svp64.py @@ -196,14 +196,14 @@ class SVP64Asm: yield ".long 0x%x" % insn return - # and svremap + # and svremap. note that the dimension fields one subtracted from each if opcode == 'svremap': insn = 22 << (31-5) # opcode 22, bits 0-5 fields = list(map(int, fields)) - insn |= fields[0] << (31-10) # SVxd , bits 6-10 - insn |= fields[1] << (31-15) # SVyd , bits 11-15 - insn |= fields[2] << (31-16) # SVzd , bits 16-20 - insn |= fields[3] << (31-21) # SVRM , bits 21-25 + insn |= (fields[0]-1) << (31-10) # SVxd , bits 6-10 + insn |= (fields[1]-1) << (31-15) # SVyd , bits 11-15 + insn |= (fields[2]-1) << (31-16) # SVzd , bits 16-20 + insn |= (fields[3]) << (31-21) # SVRM , bits 21-25 insn |= 0b00001 << (31-30) # XO , bits 26..30 log ("svremap", bin(insn)) yield ".long 0x%x" % insn -- 2.30.2