From 9f2c5d0f0fabe763643f08875e6adaa14cf8d763 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Tue, 20 Jul 2021 14:06:04 +0100 Subject: [PATCH] add inner sub-loop testing from svstep Rc=1 --- src/openpower/decoder/isa/caller.py | 69 +++++---- src/openpower/decoder/isa/remap_fft_yield.py | 2 +- .../decoder/isa/test_caller_setvl.py | 139 +++++++++++++++++- 3 files changed, 174 insertions(+), 36 deletions(-) diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index 7206e108..ab0f8ce9 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -1019,6 +1019,41 @@ class ISACaller: asmop = 'mtcrf' return asmop + def get_remap_indices(self): + # go through all iterators in lock-step, advance to next remap_idx + srcstep, dststep = self.new_srcstep, self.new_dststep + # get four SVSHAPEs. here we are hard-coding + SVSHAPE0 = self.spr['SVSHAPE0'] + SVSHAPE1 = self.spr['SVSHAPE1'] + SVSHAPE2 = self.spr['SVSHAPE2'] + SVSHAPE3 = self.spr['SVSHAPE3'] + # set up the iterators + remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()), + (SVSHAPE1, SVSHAPE1.get_iterator()), + (SVSHAPE2, SVSHAPE2.get_iterator()), + (SVSHAPE3, SVSHAPE3.get_iterator()), + ] + + remap_idxs = [] + self.remap_loopends = [0] * 4 + dbg = [] + for i, (shape, remap) in enumerate(remaps): + # zero is "disabled" + if shape.value == 0x0: + remap_idxs.append(0) + # pick src or dststep depending on reg num (0-2=in, 3-4=out) + step = dststep if (i in [3, 4]) else srcstep + # this is terrible. O(N^2) looking for the match. but hey. + for idx, (remap_idx, loopends) in enumerate(remap): + if idx == step: + break + remap_idxs.append(remap_idx) + self.remap_loopends[i] = loopends + dbg.append((i, step, remap_idx, loopends)) + for (i, step, remap_idx, loopends) in dbg: + log ("SVSHAPE %d idx, end" % i, step, remap_idx, bin(loopends)) + return remap_idxs, remaps + def get_spr_msb(self): dec_insn = yield self.dec2.e.do.insn return dec_insn & (1 << 20) != 0 # sigh - XFF.spr[-1]? @@ -1169,11 +1204,6 @@ class ISACaller: yield self.dec2.remap_active.eq(remap_en if active else 0) yield Settle() if self.is_svp64_mode and (persist or self.last_op_svshape): - # get four SVSHAPEs. here we are hard-coding - SVSHAPE0 = self.spr['SVSHAPE0'] - SVSHAPE1 = self.spr['SVSHAPE1'] - SVSHAPE2 = self.spr['SVSHAPE2'] - SVSHAPE3 = self.spr['SVSHAPE3'] # just some convenient debug info for i in range(4): sname = 'SVSHAPE%d' % i @@ -1195,28 +1225,7 @@ class ISACaller: (self.dec2.o_step, mo0), # RT (self.dec2.o2_step, mo1), # EA ] - # set up the iterators - remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()), - (SVSHAPE1, SVSHAPE1.get_iterator()), - (SVSHAPE2, SVSHAPE2.get_iterator()), - (SVSHAPE3, SVSHAPE3.get_iterator()), - ] - # go through all iterators in lock-step, advance to next remap_idx - remap_idxs = [] - self.remap_loopends = [] - for i, (shape, remap) in enumerate(remaps): - # zero is "disabled" - if shape.value == 0x0: - remap_idxs.append(0) - # pick src or dststep depending on reg num (0-2=in, 3-4=out) - step = dststep if (i in [3, 4]) else srcstep - # this is terrible. O(N^2) looking for the match. but hey. - for idx, (remap_idx, loopends) in enumerate(remap): - if idx == step: - break - remap_idxs.append(remap_idx) - self.remap_loopends.append(loopends) - + remap_idxs, remaps = self.get_remap_indices() rremaps = [] # now cross-index the required SHAPE for each of 3-in 2-out regs rnames = ['RA', 'RB', 'RC', 'RT', 'EA'] @@ -1230,7 +1239,7 @@ class ISACaller: yield dstep.eq(remap_idx) # debug printout info - rremaps.append((shape.mode, i, rnames[i], step, shape_idx, + rremaps.append((shape.mode, i, rnames[i], shape_idx, remap_idx)) for x in rremaps: log ("shape remap", x) @@ -1471,6 +1480,7 @@ class ISACaller: else: log ("SVSTATE_NEXT: post-inc") srcstep, dststep = self.new_srcstep, self.new_dststep + remap_idxs, remaps = self.get_remap_indices() vl = self.svstate.vl end_src = srcstep == vl-1 end_dst = dststep == vl-1 @@ -1490,7 +1500,8 @@ class ISACaller: # see if svstep was requested, if so, which SVSTATE endings = 0b111 if self.svstate_next_mode > 0: - endings = self.remap_loopends[self.svstate_nextmode-1] + shape_idx = self.svstate_next_mode.value-1 + endings = self.remap_loopends[shape_idx] cr_field = SelectableInt((~endings)<<1 | endtest, 4) print ("svstep Rc=1, CR0", cr_field) self.crl[0].eq(cr_field) # CR0 diff --git a/src/openpower/decoder/isa/remap_fft_yield.py b/src/openpower/decoder/isa/remap_fft_yield.py index ea9421b5..422c2187 100644 --- a/src/openpower/decoder/isa/remap_fft_yield.py +++ b/src/openpower/decoder/isa/remap_fft_yield.py @@ -82,7 +82,7 @@ def iterate_butterfly_indices(SVSHAPE): def demo(): # set the dimension sizes here - xdim = 16 + xdim = 8 ydim = 0 # not needed zdim = 0 # again, not needed diff --git a/src/openpower/decoder/isa/test_caller_setvl.py b/src/openpower/decoder/isa/test_caller_setvl.py index 2706332c..d1adae8c 100644 --- a/src/openpower/decoder/isa/test_caller_setvl.py +++ b/src/openpower/decoder/isa/test_caller_setvl.py @@ -23,7 +23,7 @@ class DecoderTestCase(FHDLTestCase): for i in range(32): self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64)) - def test__svstep_1(self): + def test_svstep_1(self): lst = SVP64Asm(["setvl 0, 0, 10, 1, 1, 1", # actual setvl (VF mode) "setvl 0, 0, 1, 1, 0, 0", # svstep "setvl 0, 0, 1, 1, 0, 0" # svstep @@ -52,7 +52,7 @@ class DecoderTestCase(FHDLTestCase): print(" gpr1", sim.gpr(0)) self.assertEqual(sim.gpr(0), SelectableInt(0, 64)) - def test__svstep_2(self): + def test_svstep_2(self): """tests svstep when it reaches VL """ lst = SVP64Asm(["setvl 0, 0, 2, 1, 1, 1", # actual setvl (VF mode) @@ -90,7 +90,7 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(CR0[CRFields.GT], 0) self.assertEqual(CR0[CRFields.SO], 1) - def test__svstep_3(self): + def test_svstep_3(self): """tests svstep when it *doesn't* reach VL """ lst = SVP64Asm(["setvl 0, 0, 3, 1, 1, 1", # actual setvl (VF mode) @@ -129,7 +129,7 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(CR0[CRFields.SO], 0) - def test__setvl_1(self): + def test_setvl_1(self): """straight setvl, testing if VL and MVL are over-ridden """ lst = SVP64Asm(["setvl 1, 0, 10, 0, 1, 1", @@ -153,7 +153,134 @@ class DecoderTestCase(FHDLTestCase): print(" gpr1", sim.gpr(1)) self.assertEqual(sim.gpr(1), SelectableInt(10, 64)) - def test__sv_add(self): + def test_svstep_inner_loop_6(self): + """tests svstep inner loop, running 6 times, looking for "k" + """ + lst = SVP64Asm([ + # set triple butterfly mode with persistent "REMAP" + "svshape 8, 1, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1, 1", + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + ]) + lst = list(lst) + + # SVSTATE + svstate = SVP64State() + #svstate.vl = 2 # VL + #svstate.maxvl = 2 # MAXVL + print ("SVSTATE", bin(svstate.asint())) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, svstate=svstate) + print ("SVSTATE after", bin(sim.svstate.asint())) + print (" vl", bin(sim.svstate.vl)) + print (" mvl", bin(sim.svstate.maxvl)) + print (" srcstep", bin(sim.svstate.srcstep)) + print (" dststep", bin(sim.svstate.dststep)) + print (" vfirst", bin(sim.svstate. vfirst)) + self.assertEqual(sim.svstate.vl, 12) + self.assertEqual(sim.svstate.maxvl, 12) + # svstep called twice, didn't reach VL, so srcstep/dststep both 2 + self.assertEqual(sim.svstate.srcstep, 6) + self.assertEqual(sim.svstate.dststep, 6) + self.assertEqual(sim.gpr(0), SelectableInt(0, 64)) + self.assertEqual(sim.svstate.vfirst, 1) + CR0 = sim.crl[0] + print(" CR0", bin(CR0.get_range().value)) + self.assertEqual(CR0[CRFields.EQ], 0) + self.assertEqual(CR0[CRFields.LT], 1) + self.assertEqual(CR0[CRFields.GT], 1) + self.assertEqual(CR0[CRFields.SO], 0) + + def test_svstep_inner_loop_3(self): + """tests svstep inner loop, running 3 times + """ + lst = SVP64Asm([ + # set triple butterfly mode with persistent "REMAP" + "svshape 8, 1, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1, 1", + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0", # svstep (Rc=1) + ]) + lst = list(lst) + + # SVSTATE + svstate = SVP64State() + #svstate.vl = 2 # VL + #svstate.maxvl = 2 # MAXVL + print ("SVSTATE", bin(svstate.asint())) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, svstate=svstate) + print ("SVSTATE after", bin(sim.svstate.asint())) + print (" vl", bin(sim.svstate.vl)) + print (" mvl", bin(sim.svstate.maxvl)) + print (" srcstep", bin(sim.svstate.srcstep)) + print (" dststep", bin(sim.svstate.dststep)) + print (" vfirst", bin(sim.svstate. vfirst)) + self.assertEqual(sim.svstate.vl, 12) + self.assertEqual(sim.svstate.maxvl, 12) + # svstep called twice, didn't reach VL, so srcstep/dststep both 2 + self.assertEqual(sim.svstate.srcstep, 3) + self.assertEqual(sim.svstate.dststep, 3) + self.assertEqual(sim.gpr(0), SelectableInt(0, 64)) + self.assertEqual(sim.svstate.vfirst, 1) + CR0 = sim.crl[0] + print(" CR0", bin(CR0.get_range().value)) + self.assertEqual(CR0[CRFields.EQ], 0) + self.assertEqual(CR0[CRFields.LT], 1) + self.assertEqual(CR0[CRFields.GT], 1) + self.assertEqual(CR0[CRFields.SO], 0) + + def test_svstep_inner_loop_4(self): + """tests svstep inner loop, running 4 times + """ + lst = SVP64Asm([ + # set triple butterfly mode with persistent "REMAP" + "svshape 8, 1, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1, 1", + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0", # svstep (Rc=1) + "setvl. 0, 0, 2, 1, 0, 0", # svstep (Rc=1) + ]) + lst = list(lst) + + # SVSTATE + svstate = SVP64State() + #svstate.vl = 2 # VL + #svstate.maxvl = 2 # MAXVL + print ("SVSTATE", bin(svstate.asint())) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, svstate=svstate) + print ("SVSTATE after", bin(sim.svstate.asint())) + print (" vl", bin(sim.svstate.vl)) + print (" mvl", bin(sim.svstate.maxvl)) + print (" srcstep", bin(sim.svstate.srcstep)) + print (" dststep", bin(sim.svstate.dststep)) + print (" vfirst", bin(sim.svstate. vfirst)) + self.assertEqual(sim.svstate.vl, 12) + self.assertEqual(sim.svstate.maxvl, 12) + # svstep called twice, didn't reach VL, so srcstep/dststep both 2 + self.assertEqual(sim.svstate.srcstep, 4) + self.assertEqual(sim.svstate.dststep, 4) + self.assertEqual(sim.gpr(0), SelectableInt(0, 64)) + self.assertEqual(sim.svstate.vfirst, 1) + CR0 = sim.crl[0] + print(" CR0", bin(CR0.get_range().value)) + self.assertEqual(CR0[CRFields.EQ], 0) + self.assertEqual(CR0[CRFields.LT], 1) + self.assertEqual(CR0[CRFields.GT], 0) + self.assertEqual(CR0[CRFields.SO], 0) + + def test_sv_add(self): """sets VL=2 then adds: * 1 = 5 + 9 => 0x5555 = 0x4321+0x1234 * 2 = 6 + 10 => 0x3334 = 0x2223+0x1111 @@ -181,7 +308,7 @@ class DecoderTestCase(FHDLTestCase): sim = self.run_tst_program(program, initial_regs) self._check_regs(sim, expected_regs) - def test__svstep_add_1(self): + def test_svstep_add_1(self): """tests svstep with an add, when it reaches VL lst = SVP64Asm(["setvl 3, 0, 2, 1, 1, 1", 'sv.add 1.v, 5.v, 9.v', -- 2.30.2