add ability to get current SVSHAPE indices into a register,
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 24 Jul 2021 10:43:23 +0000 (11:43 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 24 Jul 2021 10:43:23 +0000 (11:43 +0100)
using setvl "Vertical First" test mode

openpower/isa/simplev.mdwn
src/openpower/decoder/isa/caller.py
src/openpower/decoder/isa/test_caller_setvl.py

index 159da8c9c488f8fd201e301bf1a499bb93784c02..eb82778625c4c0c7bd374397edbd7b88d40f6887 100644 (file)
@@ -11,7 +11,9 @@ SVL-Form
 Pseudo-code:
 
     if (vf & (¬vs) & ¬(ms)) = 1 then
-        SVSTATE_NEXT(SVi)
+        step <- SVSTATE_NEXT(SVi)
+        if _RT != 0b00000 then
+           GPR(_RT) <- [0]*57 || step
     else
         VLimm <- SVi + 1
         if vs = 1 then
index c6b51b15eef0e554b6b8bb1d6cc1508f608886fe..b3d4d8cc3074efd2c2310448d8ea99c399ff2df3 100644 (file)
@@ -1034,25 +1034,25 @@ class ISACaller:
                   (SVSHAPE3, SVSHAPE3.get_iterator()),
                  ]
 
-        remap_idxs = []
         self.remap_loopends = [0] * 4
+        self.remap_idxs = [0] * 4
         dbg = []
         for i, (shape, remap) in enumerate(remaps):
             # zero is "disabled"
             if shape.value == 0x0:
-                remap_idxs.append(0)
+                self.remap_idxs[i] = 0
             # pick src or dststep depending on reg num (0-2=in, 3-4=out)
             step = dststep if (i in [3, 4]) else srcstep
             # this is terrible.  O(N^2) looking for the match. but hey.
             for idx, (remap_idx, loopends) in enumerate(remap):
                 if idx == step:
                     break
-            remap_idxs.append(remap_idx)
+            self.remap_idxs[i] = remap_idx
             self.remap_loopends[i] = loopends
             dbg.append((i, step, remap_idx, loopends))
         for (i, step, remap_idx, loopends) in dbg:
             log ("SVSHAPE %d idx, end" % i, step, remap_idx, bin(loopends))
-        return remap_idxs, remaps
+        return remaps
 
     def get_spr_msb(self):
         dec_insn = yield self.dec2.e.do.insn
@@ -1064,15 +1064,15 @@ class ISACaller:
         self.last_st_addr = None # reset the last known store address
         self.last_ld_addr = None # etc.
 
-        name = name.strip()  # remove spaces if not already done so
+        ins_name = name.strip()  # remove spaces if not already done so
         if self.halted:
-            log("halted - not executing", name)
+            log("halted - not executing", ins_name)
             return
 
         # TODO, asmregs is from the spec, e.g. add RT,RA,RB
         # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
         asmop = yield from self.get_assembly_name()
-        log("call", name, asmop)
+        log("call", ins_name, asmop)
 
         # check privileged
         int_op = yield self.dec2.dec.op.internal_op
@@ -1098,58 +1098,58 @@ class ISACaller:
             return
 
         # check halted condition
-        if name == 'attn':
+        if ins_name == 'attn':
             self.halted = True
             return
 
         # check illegal instruction
         illegal = False
-        if name not in ['mtcrf', 'mtocrf']:
-            illegal = name != asmop
+        if ins_name not in ['mtcrf', 'mtocrf']:
+            illegal = ins_name != asmop
 
         # sigh deal with setvl not being supported by binutils (.long)
         if asmop.startswith('setvl'):
             illegal = False
-            name = 'setvl'
+            ins_name = 'setvl'
 
         # and svremap not being supported by binutils (.long)
         if asmop.startswith('svremap'):
             illegal = False
-            name = 'svremap'
+            ins_name = 'svremap'
 
         # and svshape not being supported by binutils (.long)
         if asmop.startswith('svshape'):
             illegal = False
-            name = 'svshape'
+            ins_name = 'svshape'
 
         # and fsin and fcos
         if asmop == 'fsins':
             illegal = False
-            name = 'fsins'
+            ins_name = 'fsins'
         if asmop == 'fcoss':
             illegal = False
-            name = 'fcoss'
+            ins_name = 'fcoss'
 
         # sigh also deal with ffmadds not being supported by binutils (.long)
         if asmop == 'ffmadds':
             illegal = False
-            name = 'ffmadds'
+            ins_name = 'ffmadds'
 
         # and fdmadds not being supported by binutils (.long)
         if asmop == 'fdmadds':
             illegal = False
-            name = 'fdmadds'
+            ins_name = 'fdmadds'
 
         # and ffadds not being supported by binutils (.long)
         if asmop == 'ffadds':
             illegal = False
-            name = 'ffadds'
+            ins_name = 'ffadds'
 
         if illegal:
-            print("illegal", name, asmop)
+            print("illegal", ins_name, asmop)
             self.call_trap(0x700, PIb.ILLEG)
             print("name %s != %s - calling ILLEGAL trap, PC: %x" %
-                  (name, asmop, self.pc.CIA.value))
+                  (ins_name, asmop, self.pc.CIA.value))
             return
 
         # this is for setvl "Vertical" mode: if set true,
@@ -1160,11 +1160,11 @@ class ISACaller:
 
         # nop has to be supported, we could let the actual op calculate
         # but PowerDecoder has a pattern for nop
-        if name is 'nop':
+        if ins_name is 'nop':
             self.update_pc_next()
             return
 
-        info = self.instrs[name]
+        info = self.instrs[ins_name]
         yield from self.prep_namespace(info.form, info.op_fields)
 
         # preserve order of register names
@@ -1173,7 +1173,7 @@ class ISACaller:
         log("input names", input_names)
 
         # get SVP64 entry for the current instruction
-        sv_rm = self.svp64rm.instrs.get(name)
+        sv_rm = self.svp64rm.instrs.get(ins_name)
         if sv_rm is not None:
             dest_cr, src_cr, src_byname, dest_byname = decode_extra(sv_rm)
         else:
@@ -1181,8 +1181,10 @@ class ISACaller:
         log ("sv rm", sv_rm, dest_cr, src_cr, src_byname, dest_byname)
 
         # see if srcstep/dststep need skipping over masked-out predicate bits
-        if self.is_svp64_mode:
+        if (self.is_svp64_mode or ins_name == 'setvl' or
+           ins_name.startswith("sv")):
             yield from self.svstate_pre_inc()
+        if self.is_svp64_mode:
             pre = yield from self.update_new_svstate_steps()
             if pre:
                 self.svp64_reset_loop()
@@ -1208,6 +1210,8 @@ class ISACaller:
         active = (persist or self.last_op_svshape) and remap_en != 0
         yield self.dec2.remap_active.eq(remap_en if active else 0)
         yield Settle()
+        if persist or self.last_op_svshape:
+            remaps = self.get_remap_indices()
         if self.is_svp64_mode and (persist or self.last_op_svshape):
             # just some convenient debug info
             for i in range(4):
@@ -1230,7 +1234,7 @@ class ISACaller:
                      (self.dec2.o_step, mo0),   # RT
                      (self.dec2.o2_step, mo1),   # EA
                     ]
-            remap_idxs, remaps = self.get_remap_indices()
+            remap_idxs = self.remap_idxs
             rremaps = []
             # now cross-index the required SHAPE for each of 3-in 2-out regs
             rnames = ['RA', 'RB', 'RC', 'RT', 'EA']
@@ -1282,7 +1286,7 @@ class ISACaller:
                 reg_val = 0
             inputs.append(reg_val)
         # arrrrgh, awful hack, to get _RT into namespace
-        if asmop == 'setvl':
+        if ins_name == 'setvl':
             regname = "_RT"
             RT = yield self.dec2.dec.RT
             self.namespace[regname] = SelectableInt(RT, 5)
@@ -1485,7 +1489,8 @@ class ISACaller:
             else:
                 log ("SVSTATE_NEXT: post-inc")
                 srcstep, dststep = self.new_srcstep, self.new_dststep
-                remap_idxs, remaps = self.get_remap_indices()
+                remaps = self.get_remap_indices()
+                remap_idxs = self.remap_idxs
                 vl = self.svstate.vl
                 end_src = srcstep == vl-1
                 end_dst = dststep == vl-1
@@ -1536,6 +1541,10 @@ class ISACaller:
         log("SVSTATE_NEXT mode", mode)
         self.allow_next_step_inc = True
         self.svstate_next_mode = mode
+        if self.svstate_next_mode > 0:
+            shape_idx = self.svstate_next_mode.value-1
+            return SelectableInt(self.remap_idxs[shape_idx], 7)
+        return SelectableInt(0, 7)
 
     def svstate_pre_inc(self):
         """check if srcstep/dststep need to skip over masked-out predicate bits
index d1adae8c26878286fd30232263cb9c4d7747492f..0d710148f372c24958a9f0dfb773f0be0fb3e8f6 100644 (file)
@@ -154,7 +154,8 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(sim.gpr(1), SelectableInt(10, 64))
 
     def test_svstep_inner_loop_6(self):
-        """tests svstep inner loop, running 6 times, looking for "k"
+        """tests svstep inner loop, running 6 times, looking for "k".
+        also sees if k is actually output into reg 2 (RT=2)
         """
         lst = SVP64Asm([
                         # set triple butterfly mode with persistent "REMAP"
@@ -165,7 +166,7 @@ class DecoderTestCase(FHDLTestCase):
                         "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
                         "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
                         "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
-                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 2, 0, 2, 1, 0, 0",# svstep (Rc=1)
                         ])
         lst = list(lst)
 
@@ -188,7 +189,7 @@ class DecoderTestCase(FHDLTestCase):
             # svstep called twice, didn't reach VL, so srcstep/dststep both 2
             self.assertEqual(sim.svstate.srcstep, 6)
             self.assertEqual(sim.svstate.dststep, 6)
-            self.assertEqual(sim.gpr(0), SelectableInt(0, 64))
+            self.assertEqual(sim.gpr(2), SelectableInt(1, 64))
             self.assertEqual(sim.svstate.vfirst, 1)
             CR0 = sim.crl[0]
             print("      CR0", bin(CR0.get_range().value))
@@ -280,6 +281,49 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(CR0[CRFields.GT], 0)
             self.assertEqual(CR0[CRFields.SO], 0)
 
+    def test_svstep_inner_loop_4_jl(self):
+        """tests svstep inner loop, running 4 times, checking
+           "jl" is returned after 4th iteration
+        """
+        lst = SVP64Asm([
+                        # set triple butterfly mode with persistent "REMAP"
+                        "svshape 8, 1, 1, 1, 1",
+                        "svremap 31, 1, 0, 2, 0, 1, 1",
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0",# svstep (Rc=1)
+                        "setvl. 0, 0, 2, 1, 0, 0", # svstep (Rc=1)
+                        "setvl. 2, 0, 2, 1, 0, 0", # svstep (Rc=1)
+                        ])
+        lst = list(lst)
+
+        # SVSTATE
+        svstate = SVP64State()
+        #svstate.vl = 2 # VL
+        #svstate.maxvl = 2 # MAXVL
+        print ("SVSTATE", bin(svstate.asint()))
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, svstate=svstate)
+            print ("SVSTATE after", bin(sim.svstate.asint()))
+            print ("        vl", bin(sim.svstate.vl))
+            print ("        mvl", bin(sim.svstate.maxvl))
+            print ("    srcstep", bin(sim.svstate.srcstep))
+            print ("    dststep", bin(sim.svstate.dststep))
+            print ("     vfirst", bin(sim.svstate. vfirst))
+            self.assertEqual(sim.svstate.vl, 12)
+            self.assertEqual(sim.svstate.maxvl, 12)
+            # svstep called twice, didn't reach VL, so srcstep/dststep both 2
+            self.assertEqual(sim.svstate.srcstep, 4)
+            self.assertEqual(sim.svstate.dststep, 4)
+            self.assertEqual(sim.gpr(2), SelectableInt(6, 64))
+            self.assertEqual(sim.svstate.vfirst, 1)
+            CR0 = sim.crl[0]
+            print("      CR0", bin(CR0.get_range().value))
+            self.assertEqual(CR0[CRFields.EQ], 0)
+            self.assertEqual(CR0[CRFields.LT], 1)
+            self.assertEqual(CR0[CRFields.GT], 0)
+            self.assertEqual(CR0[CRFields.SO], 0)
+
     def test_sv_add(self):
         """sets VL=2 then adds:
            * 1 = 5 + 9   => 0x5555 = 0x4321+0x1234