sort out CROPs fail-first in ISACaller. needed to take a copy of CR
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 6 Oct 2022 17:34:38 +0000 (18:34 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 6 Oct 2022 17:34:38 +0000 (18:34 +0100)
for when sv.cmp (and other pseudocode) *overwrites* CR and it needs
restoring (when VLI=0).  also needed to identify 3-bit and 5-bit
ffirst mode, and extract bottom 2 bits of BF

src/openpower/decoder/isa/caller.py
src/openpower/decoder/isa/test_caller_svp64_inssort.py
src/openpower/decoder/power_decoder2.py
src/openpower/decoder/power_svp64_rm.py

index c4cb0c3978d589eeb58668b1244f65ab1236bbdb..078a2f0b6271536b4d39646768a646c0e2189d3e 100644 (file)
@@ -1089,6 +1089,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
         # create CR then allow portions of it to be "selectable" (below)
         self.cr_fields = CRFields(initial_cr)
         self.cr = self.cr_fields.cr
+        self.cr_backup = 0 # sigh, dreadful hack: for fail-first (VLi)
 
         # "undefined", just set to variable-bit-width int (use exts "max")
         # self.undefined = SelectableInt(0, 256)  # TODO, not hard-code 256!
@@ -1230,6 +1231,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
         self.namespace['VL'] = vl
         self.namespace['srcstep'] = srcstep
 
+        # take a copy of the CR field value: if non-VLi fail-first fails
+        # this is because the pseudocode writes *directly* to CR. sigh
+        self.cr_backup = self.cr.value
+
         # sv.bc* need some extra fields
         if self.is_svp64_mode and insn_name.startswith("sv.bc"):
             # blegh grab bits manually
@@ -1851,7 +1856,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
             yield from self.do_rc_ov(ins_name, results[0], overflow, cr0)
 
         # check failfirst
-        ffirst_hit = False
+        ffirst_hit = False, False
         if self.is_svp64_mode:
             sv_mode = yield self.dec2.rm_dec.sv_mode
             is_cr = sv_mode == SVMode.CROP.value
@@ -1869,15 +1874,15 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
         ff_inv = yield self.dec2.rm_dec.inv
         cr_bit = yield self.dec2.rm_dec.cr_sel
         RC1 = yield self.dec2.rm_dec.RC1
-        vli = yield self.dec2.rm_dec.vli # VL inclusive if truncated
+        vli_ = yield self.dec2.rm_dec.vli # VL inclusive if truncated
         log(" ff rm_mode", rc_en, rm_mode, SVP64RMMode.FFIRST.value)
         log("        inv", ff_inv)
         log("        RC1", RC1)
-        log("        vli", vli)
+        log("        vli", vli_)
         log("     cr_bit", cr_bit)
         log("      rc_en", rc_en)
         if not rc_en or rm_mode != SVP64RMMode.FFIRST.value:
-            return False
+            return False, False
         # get the CR vevtor, do BO-test
         crf = "CR0"
         log("asmregs", info.asmregs[0], info.write_regs)
@@ -1889,13 +1894,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
         log("cr test", crf, regnum, int(crtest), crtest, cr_bit, ff_inv)
         log("cr test?", ffirst_hit)
         if not ffirst_hit:
-            return False
+            return False, False
         # Fail-first activated, truncate VL
-        vli = SelectableInt(int(vli), 7)
+        vli = SelectableInt(int(vli_), 7)
         self.svstate.vl = srcstep + vli
         yield self.dec2.state.svstate.eq(self.svstate.value)
         yield Settle()  # let decoder update
-        return True
+        return True, vli_
 
     def do_rc_ov(self, ins_name, result, overflow, cr0):
         if ins_name.startswith("f"):
@@ -1920,9 +1925,15 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
 
     def do_outregs_nia(self, asmop, ins_name, info, outs,
                        carry_en, rc_en, ffirst_hit):
-        # write out any regs for this instruction
-        for name, output in outs.items():
-            yield from self.check_write(info, name, output, carry_en)
+        ffirst_hit, vli = ffirst_hit
+        if not ffirst_hit or vli:
+            # write out any regs for this instruction
+            for name, output in outs.items():
+                yield from self.check_write(info, name, output, carry_en)
+        # restore the CR value on non-VLI failfirst (from sv.cmp and others
+        # which write directly to CR in the pseudocode (gah, what a mess)
+        if ffirst_hit and not vli:
+            self.cr.value = self.cr_backup
 
         if ffirst_hit:
             self.svp64_reset_loop()
index 9703cebbe1af2b6d7e032b7c8b37246ac36b555d..9b0490fc469adfbb08512931b9c90e698816a7b7 100644 (file)
@@ -33,6 +33,120 @@ class DecoderTestCase(FHDLTestCase):
         for i in range(32):
             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
 
+    def test_sv_cmp_ff_vli(self):
+        lst = SVP64Asm(["sv.cmp/ff=eq/vli *0, 1, *16, 0",
+                        ])
+        lst = list(lst)
+
+        # SVSTATE vl=10
+        svstate = SVP64State()
+        svstate.vl = 3 # VL
+        svstate.maxvl = 3 # MAXVL
+        print ("SVSTATE", bin(svstate.asint()))
+
+        gprs = [0] * 64
+        vec = [1, 2, 3]
+        crs_expected = [8, 2, 0] # LT EQ GT
+
+        res = []
+        # store GPRs
+        for i, x in enumerate(vec):
+            gprs[i+16] = x
+
+        gprs[0] = 2 # middle value of vec
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, initial_regs=gprs,
+                                                svstate=svstate)
+            print ("spr svstate ", sim.svstate)
+            print ("          vl", sim.svstate.vl)
+            for i in range(len(vec)):
+                val = sim.gpr(16+i).value
+                res.append(val)
+                crf = sim.crl[i].get_range().value
+                print ("i", i, val, crf)
+            for i in range(len(vec)):
+                crf = sim.crl[i].get_range().value
+                assert crf == crs_expected[i], "cr %d %s expect %s" % \
+                        (i, crf, crs_expected[i])
+            assert sim.svstate.vl == 2
+
+    def test_sv_cmp_ff(self):
+        lst = SVP64Asm(["sv.cmp/ff=eq *0, 1, *16, 0",
+                        ])
+        lst = list(lst)
+
+        # SVSTATE vl=10
+        svstate = SVP64State()
+        svstate.vl = 3 # VL
+        svstate.maxvl = 3 # MAXVL
+        print ("SVSTATE", bin(svstate.asint()))
+
+        gprs = [0] * 64
+        vec = [1, 2, 3]
+        crs_expected = [8, 0, 0] # LT EQ GT
+
+        res = []
+        # store GPRs
+        for i, x in enumerate(vec):
+            gprs[i+16] = x
+
+        gprs[0] = 2 # middle value of vec
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, initial_regs=gprs,
+                                                svstate=svstate)
+            print ("spr svstate ", sim.svstate)
+            print ("          vl", sim.svstate.vl)
+            for i in range(len(vec)):
+                val = sim.gpr(16+i).value
+                res.append(val)
+                crf = sim.crl[i].get_range().value
+                print ("i", i, val, crf)
+            for i in range(len(vec)):
+                crf = sim.crl[i].get_range().value
+                assert crf == crs_expected[i], "cr %d %s expect %s" % \
+                        (i, crf, crs_expected[i])
+            assert sim.svstate.vl == 1
+
+    def test_sv_cmp_ff_lt(self):
+        lst = SVP64Asm(["sv.cmp/ff=gt *0, 1, *16, 0",
+                        ])
+        lst = list(lst)
+
+        # SVSTATE vl=10
+        svstate = SVP64State()
+        svstate.vl = 3 # VL
+        svstate.maxvl = 3 # MAXVL
+        print ("SVSTATE", bin(svstate.asint()))
+
+        gprs = [0] * 64
+        vec = [1, 2, 3]
+        crs_expected = [8, 2, 0] # LT EQ GT
+
+        res = []
+        # store GPRs
+        for i, x in enumerate(vec):
+            gprs[i+16] = x
+
+        gprs[0] = 2 # middle value of vec
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, initial_regs=gprs,
+                                                svstate=svstate)
+            print ("spr svstate ", sim.svstate)
+            print ("          vl", sim.svstate.vl)
+            for i in range(len(vec)):
+                val = sim.gpr(16+i).value
+                res.append(val)
+                crf = sim.crl[i].get_range().value
+                print ("i", i, val, crf)
+            for i in range(len(vec)):
+                crf = sim.crl[i].get_range().value
+                assert crf == crs_expected[i], "cr %d %s expect %s" % \
+                        (i, crf, crs_expected[i])
+            assert sim.svstate.vl == 2
+
     def test_sv_cmp(self):
         lst = SVP64Asm(["sv.cmp *0, 1, *16, 0",
                         ])
@@ -125,7 +239,7 @@ class DecoderTestCase(FHDLTestCase):
                 crf = sim.crl[i].get_range().value
                 print ("i", i, val, crf)
             # confirm that the results are as expected
-            expected = reversed(sorted(vec))
+            expected = list(reversed(sorted(vec)))
             for i, v in enumerate(res):
                 self.assertEqual(v, expected[i])
 
index 2f981f35fc786a1a140f32cfe872bb2893b3be69..1ad9a52fdb3b19a81ec4905cdaf8e5e490a52c09 100644 (file)
@@ -678,6 +678,8 @@ class DecodeCROut(Elaboratable):
         self.cr_bitfield = Data(3, "cr_bitfield")
         self.whole_reg = Data(8,  "cr_fxm")
         self.sv_override = Signal(2, reset_less=True)  # do not do EXTRA spec
+        self.cr_5bit = Signal(reset_less=True)  # set True for 5-bit
+        self.cr_2bit = Signal(2, reset_less=True)  # get lowest 2 bits
 
     def elaborate(self, platform):
         m = Module()
@@ -689,6 +691,7 @@ class DecodeCROut(Elaboratable):
         comb += self.cr_bitfield.ok.eq(0)
         comb += self.whole_reg.ok.eq(0)
         comb += self.sv_override.eq(0)
+        comb += self.cr_5bit.eq(0)
 
         # please note these MUST match (setting of cr_bitfield.ok) exactly
         # with write_cr0 below in PowerDecoder2.  the reason it's separated
@@ -713,6 +716,8 @@ class DecodeCROut(Elaboratable):
             with m.Case(CROutSel.BT):
                 comb += self.cr_bitfield.data.eq(self.dec.FormXL.BT[2:5])
                 comb += self.cr_bitfield.ok.eq(1)
+                comb += self.cr_5bit.eq(1)
+                comb += self.cr_2bit.eq(self.dec.FormXL.BT[0:2])
             with m.Case(CROutSel.WHOLE_REG):
                 comb += self.whole_reg.ok.eq(1)
                 move_one = Signal(reset_less=True)
@@ -1153,6 +1158,7 @@ class PowerDecode2(PowerDecodeSubset):
                          regreduce_en=False, fp_en=fp_en)
         self.ldst_exc = LDSTException("dec2_exc")  # rewrites as OP_TRAP
         self.instr_fault = Signal()  # rewrites instruction as OP_FETCH_FAILED
+        self.crout_5bit = Signal()  # CR out is 5-bit
 
         if self.svp64_en:
             self.cr_out_isvec = Signal(1, name="cr_out_isvec")
@@ -1226,6 +1232,7 @@ class PowerDecode2(PowerDecodeSubset):
         m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec, op)
         m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec, op)
         comb += dec_a.sv_nz.eq(self.sv_a_nz)
+        comb += self.crout_5bit.eq(self.dec_cr_out.cr_5bit)
 
         if self.svp64_en:
             # and SVP64 Extra decoders
@@ -1517,6 +1524,10 @@ class PowerDecode2(PowerDecodeSubset):
 
         if self.svp64_en:
             comb += self.rm_dec.ldst_ra_vec.eq(self.in1_isvec)  # RA is vector
+            comb += self.rm_dec.cr_5bit_in.eq(self.crout_5bit)  # CR is 5-bit
+            # take bottom 2 bits of CR out (CR field selector)
+            with m.If(self.crout_5bit):
+                comb += self.rm_dec.cr_2bit_in.eq(self.dec_cr_out.cr_2bit)
 
         # SPRs out
         comb += e.read_spr1.eq(dec_a.spr_out)
index c46e39fb9b194ecb4719fe993d6a4755f87070c8..f1f86e651f9c791cdcdf7d889d9824b5235c0262 100644 (file)
@@ -103,6 +103,8 @@ class SVP64RMModeDecode(Elaboratable):
         self.svp64_vf_in = Signal()  # Vertical-First Mode
         self.ptype_in = Signal(SVPtype)
         self.rc_in = Signal()
+        self.cr_5bit_in = Signal()  # if CR field was 5-bit
+        self.cr_2bit_in = Signal()  # bottom 2 bits of CR field
         self.ldst_ra_vec = Signal() # set when RA is vec, indicate Index mode
         self.ldst_imz_in = Signal() # set when LD/ST immediate is zero
 
@@ -180,6 +182,15 @@ class SVP64RMModeDecode(Elaboratable):
                 with m.Case(2,3):
                     comb += self.mode.eq(SVP64RMMode.FFIRST) # fail-first
 
+            # extract failfirst
+            with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first
+                comb += self.inv.eq(mode[SVP64MODE.INV])
+                comb += self.vli.eq(mode[SVP64MODE.BC_VLSET])
+                with m.If(self.cr_5bit_in):
+                    comb += self.cr_sel.eq(0b10) # EQ bit index is implicit
+                with m.Else():
+                    comb += self.cr_sel.eq(cr)
+
         with m.Else():
             # combined arith / ldst decoding due to similarity
             with m.Switch(mode2):