From 192ed43e8dc84c7f72e0b1a695ee3b23ed7b79f4 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 11 Jul 2021 20:17:51 +0100 Subject: [PATCH] add svremap instruction into ISACaller alter FFT and Matrix-Multiply SVP64 tests to use new svremap generic redirect of any register to a SVSHAPE(0-3) rather than hard-coded --- src/openpower/decoder/isa/caller.py | 102 ++++++++++-------- .../decoder/isa/test_caller_setvl.py | 24 +++++ .../decoder/isa/test_caller_svp64_fft.py | 14 ++- .../decoder/isa/test_caller_svp64_matrix.py | 4 + src/openpower/decoder/power_decoder2.py | 15 +-- 5 files changed, 101 insertions(+), 58 deletions(-) diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index 3adc94bd..c6fbbd04 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -743,6 +743,7 @@ class ISACaller: # then "yield" fields only from op_fields rather than hard-coded # list, here. fields = self.decoder.sigforms[formname] + log("prep_namespace", formname, op_fields) for name in op_fields: if name == 'spr': sig = getattr(fields, name.upper()) @@ -1098,6 +1099,11 @@ class ISACaller: illegal = False name = 'setvl' + # and svremap not being supported by binutils (.long) + if asmop.startswith('svremap'): + illegal = False + name = 'svremap' + # and svshape not being supported by binutils (.long) if asmop.startswith('svshape'): illegal = False @@ -1136,7 +1142,7 @@ class ISACaller: # preserve order of register names input_names = create_args(list(info.read_regs) + list(info.uninit_regs)) - log(input_names) + log("input names", input_names) # get SVP64 entry for the current instruction sv_rm = self.svp64rm.instrs.get(name) @@ -1167,77 +1173,78 @@ class ISACaller: self.namespace['NIA']) return - # for when SVSHAPE is active, a very bad hack here (to be replaced) - # using pre-arranged schedule. all of this is awful but it is a - # start. next job will be to put the proper activation in place - yield self.dec2.remap_active.eq(1 if self.last_op_svshape else 0) + # get the REMAP SPR + SVREMAP = self.spr['SVREMAP'] + # for when SVREMAP is active, using pre-arranged schedule. + # note: modifying PowerDecoder2 needs to "settle" + remap_en = SVREMAP.men + active = self.last_op_svshape and remap_en != 0 + yield self.dec2.remap_active.eq(remap_en if active else 0) yield Settle() if self.is_svp64_mode and self.last_op_svshape: # get four SVSHAPEs. here we are hard-coding - # SVSHAPE0 to FRT, SVSHAPE1 to FRA, SVSHAPE2 to FRC and - # SVSHAPE3 to FRB, assuming "fmadd FRT, FRA, FRC, FRB." SVSHAPE0 = self.spr['SVSHAPE0'] SVSHAPE1 = self.spr['SVSHAPE1'] SVSHAPE2 = self.spr['SVSHAPE2'] SVSHAPE3 = self.spr['SVSHAPE3'] + # just some convenient debug info for i in range(4): sname = 'SVSHAPE%d' % i shape = self.spr[sname] - print (sname, bin(shape.value)) - print (" lims", shape.lims) - print (" mode", shape.mode) - print (" skip", shape.skip) - + log (sname, bin(shape.value)) + log (" lims", shape.lims) + log (" mode", shape.mode) + log (" skip", shape.skip) + + # set up the list of steps to remap + steps = [(self.dec2.in1_step, SVREMAP.mi0), # RA + (self.dec2.in2_step, SVREMAP.mi1), # RB + (self.dec2.in3_step, SVREMAP.mi2), # RC + (self.dec2.o_step, SVREMAP.mo0), # RT + (self.dec2.o2_step, SVREMAP.mo1), # EA + ] + # set up the iterators remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()), (SVSHAPE1, SVSHAPE1.get_iterator()), (SVSHAPE2, SVSHAPE2.get_iterator()), (SVSHAPE3, SVSHAPE3.get_iterator()), ] - rremaps = [] + # go through all iterators in lock-step, advance to next remap_idx + remap_idxs = [] for i, (shape, remap) in enumerate(remaps): # zero is "disabled" if shape.value == 0x0: - continue - # XXX hardcoded! pick dststep for out (i==0) else srcstep - if shape.mode == 0b00: # multiply mode - step = dststep if (i == 0) else srcstep - if shape.mode == 0b01: # FFT butterfly mode - step = srcstep # XXX HACK - for now only use srcstep + remap_idxs.append(0) + # pick src or dststep depending on reg num (0-2=in, 3-4=out) + step = dststep if (i in [3, 4]) else srcstep # this is terrible. O(N^2) looking for the match. but hey. for idx, remap_idx in enumerate(remap): if idx == step: break - # multiply mode - if shape.mode == 0b00: - if i == 0: - yield self.dec2.o_step.eq(remap_idx) # RT - yield self.dec2.o2_step.eq(remap_idx) # EA - elif i == 1: - yield self.dec2.in1_step.eq(remap_idx) # RA - elif i == 2: - yield self.dec2.in3_step.eq(remap_idx) # RB - elif i == 3: - yield self.dec2.in2_step.eq(remap_idx) # RC - # FFT butterfly mode - if shape.mode == 0b01: - if i == 0: - yield self.dec2.o_step.eq(remap_idx) # RT - yield self.dec2.in2_step.eq(remap_idx) # RB - elif i == 1: - yield self.dec2.in1_step.eq(remap_idx) # RA - yield self.dec2.o2_step.eq(remap_idx) # EA (FRS) - elif i == 2: - yield self.dec2.in3_step.eq(remap_idx) # RC - elif i == 3: - pass # no SVSHAPE3 - rremaps.append((shape.mode, i, idx, remap_idx)) # debug printing + remap_idxs.append(remap_idx) + + rremaps = [] + # now cross-index the required SHAPE for each of 3-in 2-out regs + rnames = ['RA', 'RB', 'RC', 'RT', 'EA'] + for i, (dstep, shape_idx) in enumerate(steps): + (shape, remap) = remaps[shape_idx] + remap_idx = remap_idxs[shape_idx] + # zero is "disabled" + if shape.value == 0x0: + continue + # now set the actual requested step to the current index + yield dstep.eq(remap_idx) + + # debug printout info + rremaps.append((shape.mode, i, rnames[i], step, shape_idx, + remap_idx)) for x in rremaps: - print ("shape remap", x) + log ("shape remap", x) # after that, settle down (combinatorial) to let Vector reg numbers # work themselves out yield Settle() remap_active = yield self.dec2.remap_active - print ("remap active", remap_active) + log ("remap active", bin(remap_active)) # main input registers (RT, RA ...) inputs = [] @@ -1284,7 +1291,7 @@ class ISACaller: if ldstmode == SVP64LDSTmode.BITREVERSE.value: imm = yield self.dec2.dec.fields.FormSVD.SVD[0:11] imm = exts(imm, 11) # sign-extend to integer - print ("bitrev SVD", imm) + log ("bitrev SVD", imm) replace_d = True else: imm = yield self.dec2.dec.fields.FormD.D[0:16] @@ -1499,7 +1506,7 @@ class ISACaller: # to be able to know if it should apply in the next instruction. # also (if going to use this instruction) should disable ability # to interrupt in between. sigh. - self.last_op_svshape = asmop == 'svshape' + self.last_op_svshape = asmop == 'svremap' self.update_pc_next() @@ -1688,6 +1695,7 @@ def inject(): context = args[0].namespace # variables to be injected saved_values = func_globals.copy() # Shallow copy of dict. + log("globals before", context.keys()) func_globals.update(context) result = func(*args, **kwargs) log("globals after", func_globals['CIA'], func_globals['NIA']) diff --git a/src/openpower/decoder/isa/test_caller_setvl.py b/src/openpower/decoder/isa/test_caller_setvl.py index aece841b..8cbd8174 100644 --- a/src/openpower/decoder/isa/test_caller_setvl.py +++ b/src/openpower/decoder/isa/test_caller_setvl.py @@ -328,6 +328,30 @@ class DecoderTestCase(FHDLTestCase): # check registers as expected self._check_regs(sim, expected_regs) + def test_svremap(self): + """svremap, see if values get set + """ + lst = SVP64Asm(["svremap 11, 0, 1, 2, 3, 3", + ]) + lst = list(lst) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program) + svremap = sim.spr('SVREMAP') + print ("SVREMAP after", bin(svremap.value)) + print (" men", bin(svremap.men)) + print (" mi0", bin(svremap.mi0)) + print (" mi1", bin(svremap.mi1)) + print (" mi2", bin(svremap.mi2)) + print (" mo0", bin(svremap.mo0)) + print (" mo1", bin(svremap.mo1)) + self.assertEqual(svremap.men, 11) + self.assertEqual(svremap.mi0, 0) + self.assertEqual(svremap.mi1, 1) + self.assertEqual(svremap.mi2, 2) + self.assertEqual(svremap.mo0, 3) + self.assertEqual(svremap.mo1, 3) + def run_tst_program(self, prog, initial_regs=None, svstate=None): if initial_regs is None: diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py index 605a8b3d..c7854ee8 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_fft.py +++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py @@ -123,6 +123,7 @@ class FFTTestCase(FHDLTestCase): def test_sv_remap_fpmadds_fft(self): """>>> lst = ["svshape 8, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1", "sv.ffmadds 2.v, 2.v, 2.v, 10.v" ] runs a full in-place O(N log2 N) butterfly schedule for @@ -138,6 +139,7 @@ class FFTTestCase(FHDLTestCase): (3 inputs, 2 outputs) """ lst = SVP64Asm( ["svshape 8, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1", "sv.ffmadds 0.v, 0.v, 0.v, 8.v" ]) lst = list(lst) @@ -203,6 +205,7 @@ class FFTTestCase(FHDLTestCase): def test_sv_remap_fpmadds_fft_svstep(self): """>>> lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1", "svshape 8, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1", "sv.ffmadds 0.v, 0.v, 0.v, 8.v", "setvl. 0, 0, 0, 1, 0, 0", "bc 4, 2, -16" @@ -217,6 +220,7 @@ class FFTTestCase(FHDLTestCase): """ lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1", "svshape 8, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1", "sv.ffmadds 0.v, 0.v, 0.v, 8.v", "setvl. 0, 0, 0, 1, 0, 0", "bc 4, 2, -16" @@ -412,23 +416,25 @@ class FFTTestCase(FHDLTestCase): vec[jl] = temp2 + temp1 """ lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1", - # tpre "svshape 8, 1, 1, 1", + # tpre + "svremap 31, 1, 0, 2, 0, 1", "sv.fmuls 24, 0.v, 16.v", "svshape 8, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1", "sv.fmuls 25, 8.v, 20.v", "fadds 24, 24, 25", # tpim - "svshape 8, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1", "sv.fmuls 26, 0.v, 20.v", "svshape 8, 1, 1, 1", "sv.fmuls 26, 8.v, 16.v", "fsubs 26, 26, 27", # vec_r jh/jl - "svshape 8, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1", "sv.ffadds 0.v, 24, 25", # vec_i jh/jl - "svshape 8, 1, 1, 1", + "svremap 31, 1, 0, 2, 0, 1", "sv.ffadds 8.v, 26, 27", # svstep loop diff --git a/src/openpower/decoder/isa/test_caller_svp64_matrix.py b/src/openpower/decoder/isa/test_caller_svp64_matrix.py index 0821769b..4f5519af 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_matrix.py +++ b/src/openpower/decoder/isa/test_caller_svp64_matrix.py @@ -28,11 +28,13 @@ class DecoderTestCase(FHDLTestCase): def test_sv_remap1(self): """>>> lst = ["svshape 2, 2, 3, 0", + "svremap 31, 1, 2, 3, 0, 0", "sv.fmadds 0.v, 8.v, 16.v, 0.v" ] REMAP fmadds FRT, FRA, FRC, FRB """ lst = SVP64Asm(["svshape 2, 2, 3, 0", + "svremap 31, 1, 2, 3, 0, 0", "sv.fmadds 0.v, 16.v, 32.v, 0.v" ]) lst = list(lst) @@ -99,11 +101,13 @@ class DecoderTestCase(FHDLTestCase): def test_sv_remap2(self): """>>> lst = ["svshape 5, 4, 3, 0", + "svremap 31, 1, 2, 3, 0, 0", "sv.fmadds 0.v, 8.v, 16.v, 0.v" ] REMAP fmadds FRT, FRA, FRC, FRB """ lst = SVP64Asm(["svshape 4, 3, 3, 0", + "svremap 31, 1, 2, 3, 0, 0", "sv.fmadds 0.v, 16.v, 32.v, 0.v" ]) lst = list(lst) diff --git a/src/openpower/decoder/power_decoder2.py b/src/openpower/decoder/power_decoder2.py index 2d315522..e0a49447 100644 --- a/src/openpower/decoder/power_decoder2.py +++ b/src/openpower/decoder/power_decoder2.py @@ -1116,7 +1116,7 @@ class PowerDecode2(PowerDecodeSubset): self.in3_step = Signal(7, name="reg_c_step") self.o_step = Signal(7, name="reg_o_step") self.o2_step = Signal(7, name="reg_o2_step") - self.remap_active = Signal(1, name="remap_active") + self.remap_active = Signal(5, name="remap_active") # per reg self.no_in_vec = Signal(1, name="no_in_vec") # no inputs vector self.no_out_vec = Signal(1, name="no_out_vec") # no outputs vector self.loop_continue = Signal(1, name="loop_continue") @@ -1264,12 +1264,13 @@ class PowerDecode2(PowerDecodeSubset): # registers a, b, c and out and out2 (LD/ST EA) sv_etype = self.op_get("SV_Etype") - for rname, to_reg, fromreg, svdec, remapstep, out in ( + for i, stuff in enumerate(( ("RA", e.read_reg1, dec_a.reg_out, in1_svdec, in1_step, False), ("RB", e.read_reg2, dec_b.reg_out, in2_svdec, in2_step, False), ("RC", e.read_reg3, dec_c.reg_out, in3_svdec, in3_step, False), ("RT", e.write_reg, dec_o.reg_out, o_svdec, o_step, True), - ("EA", e.write_ea, dec_o2.reg_out, o2_svdec, o2_step, True)): + ("EA", e.write_ea, dec_o2.reg_out, o2_svdec, o2_step, True))): + rname, to_reg, fromreg, svdec, remapstep, out = stuff comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM comb += svdec.etype.eq(sv_etype) # EXTRA2/3 for this insn comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) @@ -1284,7 +1285,7 @@ class PowerDecode2(PowerDecodeSubset): # however when REMAP is active, the FFT REMAP # schedule takes care of this offset. with m.If(dec_o2.reg_out.ok & dec_o2.fp_madd_en): - with m.If(~self.remap_active): + with m.If(~self.remap_active[i]): with m.If(svdec.isvec): comb += offs.eq(vl) # VL for Vectors # detect if Vectorised: add srcstep/dststep if yes. @@ -1292,7 +1293,7 @@ class PowerDecode2(PowerDecodeSubset): with m.If(svdec.isvec): selectstep = dststep if out else srcstep step = Signal(7, name="step_%s" % rname.lower()) - with m.If(self.remap_active): + with m.If(self.remap_active[i]): comb += step.eq(remapstep) with m.Else(): comb += step.eq(selectstep) @@ -1327,7 +1328,7 @@ class PowerDecode2(PowerDecodeSubset): # same trick is applied to FRA, above, but it's a lot cleaner, there with m.If(dec_o2.reg_out.ok & dec_o2.fp_madd_en): comb += offs.eq(0) - with m.If(~self.remap_active): + with m.If(~self.remap_active[4]): with m.If(o2_svdec.isvec): comb += offs.eq(vl) # VL for Vectors with m.Else(): @@ -1335,7 +1336,7 @@ class PowerDecode2(PowerDecodeSubset): svdec = o_svdec # yes take source as o_svdec... with m.If(svdec.isvec): step = Signal(7, name="step_%s" % rname.lower()) - with m.If(self.remap_active): + with m.If(self.remap_active[4]): comb += step.eq(o2_step) with m.Else(): comb += step.eq(dststep) -- 2.30.2