messy resolution of sv.bc testing, early-out detection.
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 14 Aug 2021 10:43:46 +0000 (11:43 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 14 Aug 2021 10:43:46 +0000 (11:43 +0100)
src/openpower/decoder/isa/caller.py
src/openpower/decoder/isa/test_caller_svp64_bc.py
src/openpower/decoder/power_decoder2.py
src/openpower/decoder/pseudo/pywriter.py

index 7d8497daad430e2fff7765f0c6cc0ac6bb73bab4..0026f332cf79e3f1b6ee1c04e1683d1fcc5389da 100644 (file)
@@ -21,7 +21,7 @@ from openpower.decoder.selectable_int import (FieldSelectableInt, SelectableInt,
                                         selectconcat)
 from openpower.decoder.power_enums import (spr_dict, spr_byname, XER_bits,
                                      insns, MicrOp, In1Sel, In2Sel, In3Sel,
-                                     OutSel, CROutSel, LDSTMode,
+                                     OutSel, CRInSel, CROutSel, LDSTMode,
                                      SVP64RMMode, SVP64PredMode,
                                      SVP64PredInt, SVP64PredCR,
                                      SVP64LDSTmode)
@@ -72,6 +72,7 @@ REG_SORT_ORDER = {
     "RB": 0,
     "RC": 0,
     "RS": 0,
+    "BI": 0,
     "CR": 0,
     "LR": 0,
     "CTR": 0,
@@ -441,6 +442,30 @@ def get_pdecode_idx_in(dec2, name):
     return None, False
 
 
+# TODO, really should just be using PowerDecoder2
+def get_pdecode_cr_in(dec2, name):
+    op = dec2.dec.op
+    in_sel = yield op.cr_in
+    in_bitfield = yield dec2.dec_cr_in.cr_bitfield.data
+    sv_cr_in = yield op.sv_cr_in
+    spec = yield dec2.crin_svdec.spec
+    sv_override = yield dec2.dec_cr_in.sv_override
+    # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
+    in1 = yield dec2.e.read_cr1.data
+    cr_isvec = yield dec2.cr_in_isvec
+    log ("get_pdecode_cr_in", in_sel, CROutSel.CR0.value, in1, cr_isvec)
+    log ("    sv_cr_in", sv_cr_in)
+    log ("    cr_bf", in_bitfield)
+    log ("    spec", spec)
+    log ("    override", sv_override)
+    # identify which regnames map to in / o2
+    if name == 'BI':
+        if in_sel == CRInSel.BI.value:
+            return in1, cr_isvec
+    log ("get_pdecode_cr_in not found", name)
+    return None, False
+
+
 # TODO, really should just be using PowerDecoder2
 def get_pdecode_cr_out(dec2, name):
     op = dec2.dec.op
@@ -731,11 +756,22 @@ class ISACaller:
         fields = self.decoder.sigforms[formname]
         log("prep_namespace", formname, op_fields)
         for name in op_fields:
-            if name == 'spr':
-                sig = getattr(fields, name.upper())
-            else:
+            # CR immediates. deal with separately.  needs modifying
+            # pseudocode 
+            if self.is_svp64_mode and name in ['BI']: # TODO, more CRs
+                # BI is a 5-bit, must reconstruct the value
+                regnum, is_vec = yield from get_pdecode_cr_in(self.dec2, name)
                 sig = getattr(fields, name)
-            val = yield sig
+                val = yield sig
+                # low 2 LSBs (CR field selector) remain same, CR num extended
+                assert regnum <= 7, "sigh, TODO, 128 CR fields"
+                val = (val & 0b11) | (regnum<<2)
+            else:
+                if name == 'spr':
+                    sig = getattr(fields, name.upper())
+                else:
+                    sig = getattr(fields, name)
+                val = yield sig
             # these are all opcode fields involved in index-selection of CR,
             # and need to do "standard" arithmetic.  CR[BA+32] for example
             # would, if using SelectableInt, only be 5-bit.
@@ -1557,7 +1593,7 @@ class ISACaller:
             else:
                 if self.allow_next_step_inc == 2:
                     log ("SVSTATE_NEXT: read")
-                    yield from self.svstate_post_inc()
+                    yield from self.svstate_post_inc(ins_name)
                 else:
                     log ("SVSTATE_NEXT: post-inc")
                 # use actual src/dst-step here to check end, do NOT
@@ -1597,7 +1633,7 @@ class ISACaller:
                     self.svstate.vfirst = 0
 
         elif self.is_svp64_mode:
-            yield from self.svstate_post_inc()
+            yield from self.svstate_post_inc(ins_name)
         else:
             # XXX only in non-SVP64 mode!
             # record state of whether the current operation was an svshape,
@@ -1718,7 +1754,7 @@ class ISACaller:
         # nothing needs doing (TODO zeroing): just do next instruction
         return srcstep == vl or dststep == vl
 
-    def svstate_post_inc(self, vf=0):
+    def svstate_post_inc(self, insn_name, vf=0):
         # check if SV "Vertical First" mode is enabled
         vfirst = self.svstate.vfirst
         log ("    SV Vertical First", vf, vfirst)
@@ -1758,9 +1794,21 @@ class ISACaller:
         if svp64_is_vector and srcstep != vl-1 and dststep != vl-1:
             self.svstate.srcstep += SelectableInt(1, 7)
             self.svstate.dststep += SelectableInt(1, 7)
+            self.namespace['SVSTATE'] = self.svstate
+            # check if this was an sv.bc* and if so did it succeed
+            if self.is_svp64_mode and insn_name.startswith("sv.bc"):
+                ctr_ok = self.namespace['ctr_ok']
+                cond_ok = self.namespace['cond_ok']
+                log("branch ctr/cond", ctr_ok, cond_ok)
+                if ctr_ok.value and cond_ok.value :
+                    self.svp64_reset_loop()
+                    self.update_pc_next()
+                    return True
+            # not an SVP64 branch, so fix PC (NIA==CIA) for next loop
+            # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64)
+            # this way we keep repeating the same instruction (with new steps)
             self.pc.NIA.value = self.pc.CIA.value
             self.namespace['NIA'] = self.pc.NIA
-            self.namespace['SVSTATE'] = self.svstate
             log("end of sub-pc call", self.namespace['CIA'],
                                  self.namespace['NIA'])
             return False # DO NOT allow PC update whilst Sub-PC loop running
@@ -1789,6 +1837,7 @@ class ISACaller:
         self.pc.update_nia(self.is_svp64_mode)
         self.namespace['NIA'] = self.pc.NIA
 
+
 def inject():
     """Decorator factory.
 
@@ -1819,6 +1868,10 @@ def inject():
                   args[0].namespace['NIA'],
                   args[0].namespace['SVSTATE'])
             args[0].namespace = func_globals
+            if 'cond_ok' in args[0].namespace:
+                log("args[0] cond_ok ctr_ok",
+                      args[0].namespace['cond_ok'],
+                      args[0].namespace['ctr_ok'])
             #exec (func.__code__, func_globals)
 
             # finally:
index 983cde26c2f3a88b2ca527ad57a7848284797e4d..65ba1b70b72c862d5df0d346f25f5b26e83f7d9b 100644 (file)
@@ -51,13 +51,15 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(sim.gpr(10), SelectableInt(0x1235, 64))
 
     def test_sv_branch_cond(self):
-        for i in [0]: #[0, 10]:
+        for i in [0, 10]: #[0, 10]:
             lst = SVP64Asm(
                 [f"addi 1, 0, {i}",  # set r1 to i
+                 f"addi 2, 0, {i}",  # set r2 to i
                 "cmpi cr0, 1, 1, 10",  # compare r1 with 10 and store to cr0
-                "sv.bc 12, 2, 0x8",    # beq 0x8 -
+                "cmpi cr1, 1, 2, 10",  # compare r2 with 10 and store to cr1
+                "sv.bc 12, 2.v, 0x8",    # beq 0x8 -
                                        # branch if r1 equals 10 to the nop below
-                "addi 2, 0, 0x1234",   # if r1 == 10 this shouldn't execute
+                "addi 3, 0, 0x1234",   # if r1 == 10 this shouldn't execute
                 "or 0, 0, 0"]          # branch target
                 )
             lst = list(lst)
@@ -71,9 +73,9 @@ class DecoderTestCase(FHDLTestCase):
             with Program(lst, bigendian=False) as program:
                 sim = self.run_tst_program(program, svstate=svstate)
                 if i == 10:
-                    self.assertEqual(sim.gpr(2), SelectableInt(0, 64))
+                    self.assertEqual(sim.gpr(3), SelectableInt(0, 64))
                 else:
-                    self.assertEqual(sim.gpr(2), SelectableInt(0x1234, 64))
+                    self.assertEqual(sim.gpr(3), SelectableInt(0x1234, 64))
 
     def tst_sv_add_cr(self):
         """>>> lst = ['sv.add. 1.v, 5.v, 9.v'
index b20a014a7bff169828608694b18bf69fd668b26c..6db5c61511a79a22b6f0eccc570c9018a434d32e 100644 (file)
@@ -1191,8 +1191,9 @@ class PowerDecode2(PowerDecodeSubset):
             m.submodules.o_svdec = o_svdec = SVP64RegExtra()
             m.submodules.o2_svdec = o2_svdec = SVP64RegExtra()
 
-            # debug access to crout_svdec (used in get_pdecode_cr_out)
+            # debug access to cr svdec (used in get_pdecode_cr_in/out)
             self.crout_svdec = crout_svdec
+            self.crin_svdec = crin_svdec
 
         # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec
         reg = Signal(5, reset_less=True)
index 8cf2ed9bfdd0fd4ab308b093eaa6af8020e0e98e..8cde6d1964e3c7e0098154d9f0172919fdaf0dfb 100644 (file)
@@ -93,8 +93,12 @@ class PyISAWriter(ISA):
                 op_fname = "op_%s" % page.replace(".", "_")
                 f.write("    @inject()\n")
                 f.write("    def %s(%s):\n" % (op_fname, args))
-                if 'NIA' in pycode:  # HACK - TODO fix
-                    f.write("        global NIA\n")
+                # blech! this works in combination with ISACaller
+                # @inject decorator, which works by injecting
+                # global variables into the function namespace.
+                for blech in ['NIA', 'cond_ok', 'ctr_ok']:
+                    if blech in pycode:  # HACK - TODO fix
+                        f.write("        global %s\n" % blech)
                 pycode = pycode.split("\n")
                 pycode = '\n'.join(map(lambda x: "        %s" % x, pycode))
                 pycode = pycode.rstrip()