add svremap instruction into ISACaller
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 11 Jul 2021 19:17:51 +0000 (20:17 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 11 Jul 2021 19:17:51 +0000 (20:17 +0100)
alter FFT and Matrix-Multiply SVP64 tests to use new svremap
generic redirect of any register to a SVSHAPE(0-3) rather than hard-coded

src/openpower/decoder/isa/caller.py
src/openpower/decoder/isa/test_caller_setvl.py
src/openpower/decoder/isa/test_caller_svp64_fft.py
src/openpower/decoder/isa/test_caller_svp64_matrix.py
src/openpower/decoder/power_decoder2.py

index 3adc94bddc5fb3e75c0b65cb9c644c8b89f61535..c6fbbd0454658a05dd36224214bb94a10d3a93f5 100644 (file)
@@ -743,6 +743,7 @@ class ISACaller:
         # then "yield" fields only from op_fields rather than hard-coded
         # list, here.
         fields = self.decoder.sigforms[formname]
+        log("prep_namespace", formname, op_fields)
         for name in op_fields:
             if name == 'spr':
                 sig = getattr(fields, name.upper())
@@ -1098,6 +1099,11 @@ class ISACaller:
             illegal = False
             name = 'setvl'
 
+        # and svremap not being supported by binutils (.long)
+        if asmop.startswith('svremap'):
+            illegal = False
+            name = 'svremap'
+
         # and svshape not being supported by binutils (.long)
         if asmop.startswith('svshape'):
             illegal = False
@@ -1136,7 +1142,7 @@ class ISACaller:
         # preserve order of register names
         input_names = create_args(list(info.read_regs) +
                                   list(info.uninit_regs))
-        log(input_names)
+        log("input names", input_names)
 
         # get SVP64 entry for the current instruction
         sv_rm = self.svp64rm.instrs.get(name)
@@ -1167,77 +1173,78 @@ class ISACaller:
                                        self.namespace['NIA'])
             return
 
-        # for when SVSHAPE is active, a very bad hack here (to be replaced)
-        # using pre-arranged schedule.  all of this is awful but it is a
-        # start.  next job will be to put the proper activation in place
-        yield self.dec2.remap_active.eq(1 if self.last_op_svshape else 0)
+        # get the REMAP SPR
+        SVREMAP = self.spr['SVREMAP']
+        # for when SVREMAP is active, using pre-arranged schedule.
+        # note: modifying PowerDecoder2 needs to "settle"
+        remap_en = SVREMAP.men
+        active = self.last_op_svshape and remap_en != 0
+        yield self.dec2.remap_active.eq(remap_en if active else 0)
         yield Settle()
         if self.is_svp64_mode and self.last_op_svshape:
             # get four SVSHAPEs. here we are hard-coding
-            # SVSHAPE0 to FRT, SVSHAPE1 to FRA, SVSHAPE2 to FRC and
-            # SVSHAPE3 to FRB, assuming "fmadd FRT, FRA, FRC, FRB."
             SVSHAPE0 = self.spr['SVSHAPE0']
             SVSHAPE1 = self.spr['SVSHAPE1']
             SVSHAPE2 = self.spr['SVSHAPE2']
             SVSHAPE3 = self.spr['SVSHAPE3']
+            # just some convenient debug info
             for i in range(4):
                 sname = 'SVSHAPE%d' % i
                 shape = self.spr[sname]
-                print (sname, bin(shape.value))
-                print ("    lims", shape.lims)
-                print ("    mode", shape.mode)
-                print ("    skip", shape.skip)
-
+                log (sname, bin(shape.value))
+                log ("    lims", shape.lims)
+                log ("    mode", shape.mode)
+                log ("    skip", shape.skip)
+
+            # set up the list of steps to remap
+            steps = [(self.dec2.in1_step, SVREMAP.mi0), # RA
+                     (self.dec2.in2_step, SVREMAP.mi1), # RB
+                     (self.dec2.in3_step, SVREMAP.mi2), # RC
+                     (self.dec2.o_step, SVREMAP.mo0),   # RT
+                     (self.dec2.o2_step, SVREMAP.mo1),   # EA
+                    ]
+            # set up the iterators
             remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()),
                       (SVSHAPE1, SVSHAPE1.get_iterator()),
                       (SVSHAPE2, SVSHAPE2.get_iterator()),
                       (SVSHAPE3, SVSHAPE3.get_iterator()),
                      ]
-            rremaps = []
+            # go through all iterators in lock-step, advance to next remap_idx
+            remap_idxs = []
             for i, (shape, remap) in enumerate(remaps):
                 # zero is "disabled"
                 if shape.value == 0x0:
-                    continue
-                # XXX hardcoded! pick dststep for out (i==0) else srcstep
-                if shape.mode == 0b00: # multiply mode
-                    step = dststep if (i == 0) else srcstep
-                if shape.mode == 0b01: # FFT butterfly mode
-                    step = srcstep # XXX HACK - for now only use srcstep
+                    remap_idxs.append(0)
+                # pick src or dststep depending on reg num (0-2=in, 3-4=out)
+                step = dststep if (i in [3, 4]) else srcstep
                 # this is terrible.  O(N^2) looking for the match. but hey.
                 for idx, remap_idx in enumerate(remap):
                     if idx == step:
                         break
-                # multiply mode
-                if shape.mode == 0b00:
-                    if i == 0:
-                        yield self.dec2.o_step.eq(remap_idx)   # RT
-                        yield self.dec2.o2_step.eq(remap_idx)  # EA
-                    elif i == 1:
-                        yield self.dec2.in1_step.eq(remap_idx) # RA
-                    elif i == 2:
-                        yield self.dec2.in3_step.eq(remap_idx) # RB
-                    elif i == 3:
-                        yield self.dec2.in2_step.eq(remap_idx) # RC
-                # FFT butterfly mode
-                if shape.mode == 0b01:
-                    if i == 0:
-                        yield self.dec2.o_step.eq(remap_idx)   # RT
-                        yield self.dec2.in2_step.eq(remap_idx) # RB
-                    elif i == 1:
-                        yield self.dec2.in1_step.eq(remap_idx) # RA
-                        yield self.dec2.o2_step.eq(remap_idx)  # EA (FRS)
-                    elif i == 2:
-                        yield self.dec2.in3_step.eq(remap_idx) # RC
-                    elif i == 3:
-                        pass # no SVSHAPE3
-                rremaps.append((shape.mode, i, idx, remap_idx)) # debug printing
+                remap_idxs.append(remap_idx)
+
+            rremaps = []
+            # now cross-index the required SHAPE for each of 3-in 2-out regs
+            rnames = ['RA', 'RB', 'RC', 'RT', 'EA']
+            for i, (dstep, shape_idx) in enumerate(steps):
+                (shape, remap) = remaps[shape_idx]
+                remap_idx = remap_idxs[shape_idx]
+                # zero is "disabled"
+                if shape.value == 0x0:
+                    continue
+                # now set the actual requested step to the current index
+                yield dstep.eq(remap_idx)
+
+                # debug printout info
+                rremaps.append((shape.mode, i, rnames[i], step, shape_idx,
+                                remap_idx))
             for x in rremaps:
-                print ("shape remap", x)
+                log ("shape remap", x)
         # after that, settle down (combinatorial) to let Vector reg numbers
         # work themselves out
         yield Settle()
         remap_active = yield self.dec2.remap_active
-        print ("remap active", remap_active)
+        log ("remap active", bin(remap_active))
 
         # main input registers (RT, RA ...)
         inputs = []
@@ -1284,7 +1291,7 @@ class ISACaller:
             if ldstmode == SVP64LDSTmode.BITREVERSE.value:
                 imm = yield self.dec2.dec.fields.FormSVD.SVD[0:11]
                 imm = exts(imm, 11) # sign-extend to integer
-                print ("bitrev SVD", imm)
+                log ("bitrev SVD", imm)
                 replace_d = True
             else:
                 imm = yield self.dec2.dec.fields.FormD.D[0:16]
@@ -1499,7 +1506,7 @@ class ISACaller:
             # to be able to know if it should apply in the next instruction.
             # also (if going to use this instruction) should disable ability
             # to interrupt in between. sigh.
-            self.last_op_svshape = asmop == 'svshape'
+            self.last_op_svshape = asmop == 'svremap'
 
         self.update_pc_next()
 
@@ -1688,6 +1695,7 @@ def inject():
 
             context = args[0].namespace  # variables to be injected
             saved_values = func_globals.copy()  # Shallow copy of dict.
+            log("globals before", context.keys())
             func_globals.update(context)
             result = func(*args, **kwargs)
             log("globals after", func_globals['CIA'], func_globals['NIA'])
index aece841b510aecfba04df05f441e93aee3954fda..8cbd817418f14e512ad2acb10b563d6e589013d5 100644 (file)
@@ -328,6 +328,30 @@ class DecoderTestCase(FHDLTestCase):
             # check registers as expected
             self._check_regs(sim, expected_regs)
 
+    def test_svremap(self):
+        """svremap, see if values get set
+        """
+        lst = SVP64Asm(["svremap 11, 0, 1, 2, 3, 3",
+                        ])
+        lst = list(lst)
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program)
+            svremap = sim.spr('SVREMAP')
+            print ("SVREMAP after", bin(svremap.value))
+            print ("        men", bin(svremap.men))
+            print ("        mi0", bin(svremap.mi0))
+            print ("        mi1", bin(svremap.mi1))
+            print ("        mi2", bin(svremap.mi2))
+            print ("        mo0", bin(svremap.mo0))
+            print ("        mo1", bin(svremap.mo1))
+            self.assertEqual(svremap.men, 11)
+            self.assertEqual(svremap.mi0, 0)
+            self.assertEqual(svremap.mi1, 1)
+            self.assertEqual(svremap.mi2, 2)
+            self.assertEqual(svremap.mo0, 3)
+            self.assertEqual(svremap.mo1, 3)
+
     def run_tst_program(self, prog, initial_regs=None,
                               svstate=None):
         if initial_regs is None:
index 605a8b3da2693f9a3c8b03aa5670bc0272902ecb..c7854ee8f2c18d71662d0a5a89833a40b171c848 100644 (file)
@@ -123,6 +123,7 @@ class FFTTestCase(FHDLTestCase):
 
     def test_sv_remap_fpmadds_fft(self):
         """>>> lst = ["svshape 8, 1, 1, 1",
+                     "svremap 31, 1, 0, 2, 0, 1",
                       "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
                      ]
             runs a full in-place O(N log2 N) butterfly schedule for
@@ -138,6 +139,7 @@ class FFTTestCase(FHDLTestCase):
             (3 inputs, 2 outputs)
         """
         lst = SVP64Asm( ["svshape 8, 1, 1, 1",
+                         "svremap 31, 1, 0, 2, 0, 1",
                         "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
                         ])
         lst = list(lst)
@@ -203,6 +205,7 @@ class FFTTestCase(FHDLTestCase):
     def test_sv_remap_fpmadds_fft_svstep(self):
         """>>> lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
                             "svshape 8, 1, 1, 1",
+                             "svremap 31, 1, 0, 2, 0, 1",
                             "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
                             "setvl. 0, 0, 0, 1, 0, 0",
                             "bc 4, 2, -16"
@@ -217,6 +220,7 @@ class FFTTestCase(FHDLTestCase):
         """
         lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
                         "svshape 8, 1, 1, 1",
+                         "svremap 31, 1, 0, 2, 0, 1",
                         "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
                         "setvl. 0, 0, 0, 1, 0, 0",
                         "bc 4, 2, -16"
@@ -412,23 +416,25 @@ class FFTTestCase(FHDLTestCase):
                 vec[jl] = temp2 + temp1
         """
         lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
-                        # tpre
                         "svshape 8, 1, 1, 1",
+                        # tpre
+                         "svremap 31, 1, 0, 2, 0, 1",
                         "sv.fmuls 24, 0.v, 16.v",
                         "svshape 8, 1, 1, 1",
+                         "svremap 31, 1, 0, 2, 0, 1",
                         "sv.fmuls 25, 8.v, 20.v",
                         "fadds 24, 24, 25",
                         # tpim
-                        "svshape 8, 1, 1, 1",
+                         "svremap 31, 1, 0, 2, 0, 1",
                         "sv.fmuls 26, 0.v, 20.v",
                         "svshape 8, 1, 1, 1",
                         "sv.fmuls 26, 8.v, 16.v",
                         "fsubs 26, 26, 27",
                         # vec_r jh/jl
-                        "svshape 8, 1, 1, 1",
+                         "svremap 31, 1, 0, 2, 0, 1",
                         "sv.ffadds 0.v, 24, 25",
                         # vec_i jh/jl
-                        "svshape 8, 1, 1, 1",
+                         "svremap 31, 1, 0, 2, 0, 1",
                         "sv.ffadds 8.v, 26, 27",
 
                         # svstep loop
index 0821769bbb4daf29bb99e56ef300f862e32c7e79..4f5519afea849e97aa9ba72539009c2c7a7e7f0d 100644 (file)
@@ -28,11 +28,13 @@ class DecoderTestCase(FHDLTestCase):
 
     def test_sv_remap1(self):
         """>>> lst = ["svshape 2, 2, 3, 0",
+                        "svremap 31, 1, 2, 3, 0, 0",
                        "sv.fmadds 0.v, 8.v, 16.v, 0.v"
                         ]
                 REMAP fmadds FRT, FRA, FRC, FRB
         """
         lst = SVP64Asm(["svshape 2, 2, 3, 0",
+                        "svremap 31, 1, 2, 3, 0, 0",
                        "sv.fmadds 0.v, 16.v, 32.v, 0.v"
                         ])
         lst = list(lst)
@@ -99,11 +101,13 @@ class DecoderTestCase(FHDLTestCase):
 
     def test_sv_remap2(self):
         """>>> lst = ["svshape 5, 4, 3, 0",
+                        "svremap 31, 1, 2, 3, 0, 0",
                        "sv.fmadds 0.v, 8.v, 16.v, 0.v"
                         ]
                 REMAP fmadds FRT, FRA, FRC, FRB
         """
         lst = SVP64Asm(["svshape 4, 3, 3, 0",
+                        "svremap 31, 1, 2, 3, 0, 0",
                        "sv.fmadds 0.v, 16.v, 32.v, 0.v"
                         ])
         lst = list(lst)
index 2d315522f839eab3b2d904fe642a47c688f9b89f..e0a494477ef3ce040c1a03d4ee4c3c4fec397fc9 100644 (file)
@@ -1116,7 +1116,7 @@ class PowerDecode2(PowerDecodeSubset):
             self.in3_step = Signal(7, name="reg_c_step")
             self.o_step = Signal(7, name="reg_o_step")
             self.o2_step = Signal(7, name="reg_o2_step")
-            self.remap_active = Signal(1, name="remap_active")
+            self.remap_active = Signal(5, name="remap_active") # per reg
             self.no_in_vec = Signal(1, name="no_in_vec") # no inputs vector
             self.no_out_vec = Signal(1, name="no_out_vec") # no outputs vector
             self.loop_continue = Signal(1, name="loop_continue")
@@ -1264,12 +1264,13 @@ class PowerDecode2(PowerDecodeSubset):
 
             # registers a, b, c and out and out2 (LD/ST EA)
             sv_etype = self.op_get("SV_Etype")
-            for rname, to_reg, fromreg, svdec, remapstep, out in (
+            for i, stuff in enumerate((
                 ("RA", e.read_reg1, dec_a.reg_out, in1_svdec, in1_step, False),
                 ("RB", e.read_reg2, dec_b.reg_out, in2_svdec, in2_step, False),
                 ("RC", e.read_reg3, dec_c.reg_out, in3_svdec, in3_step, False),
                 ("RT", e.write_reg, dec_o.reg_out, o_svdec, o_step, True),
-                ("EA", e.write_ea, dec_o2.reg_out, o2_svdec, o2_step, True)):
+                ("EA", e.write_ea, dec_o2.reg_out, o2_svdec, o2_step, True))):
+                rname, to_reg, fromreg, svdec, remapstep, out = stuff
                 comb += svdec.extra.eq(extra)     # EXTRA field of SVP64 RM
                 comb += svdec.etype.eq(sv_etype)  # EXTRA2/3 for this insn
                 comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA)
@@ -1284,7 +1285,7 @@ class PowerDecode2(PowerDecodeSubset):
                     # however when REMAP is active, the FFT REMAP
                     # schedule takes care of this offset.
                     with m.If(dec_o2.reg_out.ok & dec_o2.fp_madd_en):
-                        with m.If(~self.remap_active):
+                        with m.If(~self.remap_active[i]):
                             with m.If(svdec.isvec):
                                 comb += offs.eq(vl) # VL for Vectors
                 # detect if Vectorised: add srcstep/dststep if yes.
@@ -1292,7 +1293,7 @@ class PowerDecode2(PowerDecodeSubset):
                 with m.If(svdec.isvec):
                     selectstep = dststep if out else srcstep
                     step = Signal(7, name="step_%s" % rname.lower())
-                    with m.If(self.remap_active):
+                    with m.If(self.remap_active[i]):
                         comb += step.eq(remapstep)
                     with m.Else():
                         comb += step.eq(selectstep)
@@ -1327,7 +1328,7 @@ class PowerDecode2(PowerDecodeSubset):
             # same trick is applied to FRA, above, but it's a lot cleaner, there
             with m.If(dec_o2.reg_out.ok & dec_o2.fp_madd_en):
                 comb += offs.eq(0)
-                with m.If(~self.remap_active):
+                with m.If(~self.remap_active[4]):
                     with m.If(o2_svdec.isvec):
                         comb += offs.eq(vl) # VL for Vectors
                     with m.Else():
@@ -1335,7 +1336,7 @@ class PowerDecode2(PowerDecodeSubset):
                 svdec = o_svdec # yes take source as o_svdec...
                 with m.If(svdec.isvec):
                     step = Signal(7, name="step_%s" % rname.lower())
-                    with m.If(self.remap_active):
+                    with m.If(self.remap_active[4]):
                         comb += step.eq(o2_step)
                     with m.Else():
                         comb += step.eq(dststep)