From f1ff815e0bc75ef1319ae350a969e7325065e12b Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 6 Oct 2022 18:34:38 +0100 Subject: [PATCH] sort out CROPs fail-first in ISACaller. needed to take a copy of CR for when sv.cmp (and other pseudocode) *overwrites* CR and it needs restoring (when VLI=0). also needed to identify 3-bit and 5-bit ffirst mode, and extract bottom 2 bits of BF --- src/openpower/decoder/isa/caller.py | 31 +++-- .../decoder/isa/test_caller_svp64_inssort.py | 116 +++++++++++++++++- src/openpower/decoder/power_decoder2.py | 11 ++ src/openpower/decoder/power_svp64_rm.py | 11 ++ 4 files changed, 158 insertions(+), 11 deletions(-) diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index c4cb0c39..078a2f0b 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -1089,6 +1089,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # create CR then allow portions of it to be "selectable" (below) self.cr_fields = CRFields(initial_cr) self.cr = self.cr_fields.cr + self.cr_backup = 0 # sigh, dreadful hack: for fail-first (VLi) # "undefined", just set to variable-bit-width int (use exts "max") # self.undefined = SelectableInt(0, 256) # TODO, not hard-code 256! @@ -1230,6 +1231,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.namespace['VL'] = vl self.namespace['srcstep'] = srcstep + # take a copy of the CR field value: if non-VLi fail-first fails + # this is because the pseudocode writes *directly* to CR. sigh + self.cr_backup = self.cr.value + # sv.bc* need some extra fields if self.is_svp64_mode and insn_name.startswith("sv.bc"): # blegh grab bits manually @@ -1851,7 +1856,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): yield from self.do_rc_ov(ins_name, results[0], overflow, cr0) # check failfirst - ffirst_hit = False + ffirst_hit = False, False if self.is_svp64_mode: sv_mode = yield self.dec2.rm_dec.sv_mode is_cr = sv_mode == SVMode.CROP.value @@ -1869,15 +1874,15 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): ff_inv = yield self.dec2.rm_dec.inv cr_bit = yield self.dec2.rm_dec.cr_sel RC1 = yield self.dec2.rm_dec.RC1 - vli = yield self.dec2.rm_dec.vli # VL inclusive if truncated + vli_ = yield self.dec2.rm_dec.vli # VL inclusive if truncated log(" ff rm_mode", rc_en, rm_mode, SVP64RMMode.FFIRST.value) log(" inv", ff_inv) log(" RC1", RC1) - log(" vli", vli) + log(" vli", vli_) log(" cr_bit", cr_bit) log(" rc_en", rc_en) if not rc_en or rm_mode != SVP64RMMode.FFIRST.value: - return False + return False, False # get the CR vevtor, do BO-test crf = "CR0" log("asmregs", info.asmregs[0], info.write_regs) @@ -1889,13 +1894,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): log("cr test", crf, regnum, int(crtest), crtest, cr_bit, ff_inv) log("cr test?", ffirst_hit) if not ffirst_hit: - return False + return False, False # Fail-first activated, truncate VL - vli = SelectableInt(int(vli), 7) + vli = SelectableInt(int(vli_), 7) self.svstate.vl = srcstep + vli yield self.dec2.state.svstate.eq(self.svstate.value) yield Settle() # let decoder update - return True + return True, vli_ def do_rc_ov(self, ins_name, result, overflow, cr0): if ins_name.startswith("f"): @@ -1920,9 +1925,15 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): def do_outregs_nia(self, asmop, ins_name, info, outs, carry_en, rc_en, ffirst_hit): - # write out any regs for this instruction - for name, output in outs.items(): - yield from self.check_write(info, name, output, carry_en) + ffirst_hit, vli = ffirst_hit + if not ffirst_hit or vli: + # write out any regs for this instruction + for name, output in outs.items(): + yield from self.check_write(info, name, output, carry_en) + # restore the CR value on non-VLI failfirst (from sv.cmp and others + # which write directly to CR in the pseudocode (gah, what a mess) + if ffirst_hit and not vli: + self.cr.value = self.cr_backup if ffirst_hit: self.svp64_reset_loop() diff --git a/src/openpower/decoder/isa/test_caller_svp64_inssort.py b/src/openpower/decoder/isa/test_caller_svp64_inssort.py index 9703cebb..9b0490fc 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_inssort.py +++ b/src/openpower/decoder/isa/test_caller_svp64_inssort.py @@ -33,6 +33,120 @@ class DecoderTestCase(FHDLTestCase): for i in range(32): self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64)) + def test_sv_cmp_ff_vli(self): + lst = SVP64Asm(["sv.cmp/ff=eq/vli *0, 1, *16, 0", + ]) + lst = list(lst) + + # SVSTATE vl=10 + svstate = SVP64State() + svstate.vl = 3 # VL + svstate.maxvl = 3 # MAXVL + print ("SVSTATE", bin(svstate.asint())) + + gprs = [0] * 64 + vec = [1, 2, 3] + crs_expected = [8, 2, 0] # LT EQ GT + + res = [] + # store GPRs + for i, x in enumerate(vec): + gprs[i+16] = x + + gprs[0] = 2 # middle value of vec + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, initial_regs=gprs, + svstate=svstate) + print ("spr svstate ", sim.svstate) + print (" vl", sim.svstate.vl) + for i in range(len(vec)): + val = sim.gpr(16+i).value + res.append(val) + crf = sim.crl[i].get_range().value + print ("i", i, val, crf) + for i in range(len(vec)): + crf = sim.crl[i].get_range().value + assert crf == crs_expected[i], "cr %d %s expect %s" % \ + (i, crf, crs_expected[i]) + assert sim.svstate.vl == 2 + + def test_sv_cmp_ff(self): + lst = SVP64Asm(["sv.cmp/ff=eq *0, 1, *16, 0", + ]) + lst = list(lst) + + # SVSTATE vl=10 + svstate = SVP64State() + svstate.vl = 3 # VL + svstate.maxvl = 3 # MAXVL + print ("SVSTATE", bin(svstate.asint())) + + gprs = [0] * 64 + vec = [1, 2, 3] + crs_expected = [8, 0, 0] # LT EQ GT + + res = [] + # store GPRs + for i, x in enumerate(vec): + gprs[i+16] = x + + gprs[0] = 2 # middle value of vec + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, initial_regs=gprs, + svstate=svstate) + print ("spr svstate ", sim.svstate) + print (" vl", sim.svstate.vl) + for i in range(len(vec)): + val = sim.gpr(16+i).value + res.append(val) + crf = sim.crl[i].get_range().value + print ("i", i, val, crf) + for i in range(len(vec)): + crf = sim.crl[i].get_range().value + assert crf == crs_expected[i], "cr %d %s expect %s" % \ + (i, crf, crs_expected[i]) + assert sim.svstate.vl == 1 + + def test_sv_cmp_ff_lt(self): + lst = SVP64Asm(["sv.cmp/ff=gt *0, 1, *16, 0", + ]) + lst = list(lst) + + # SVSTATE vl=10 + svstate = SVP64State() + svstate.vl = 3 # VL + svstate.maxvl = 3 # MAXVL + print ("SVSTATE", bin(svstate.asint())) + + gprs = [0] * 64 + vec = [1, 2, 3] + crs_expected = [8, 2, 0] # LT EQ GT + + res = [] + # store GPRs + for i, x in enumerate(vec): + gprs[i+16] = x + + gprs[0] = 2 # middle value of vec + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, initial_regs=gprs, + svstate=svstate) + print ("spr svstate ", sim.svstate) + print (" vl", sim.svstate.vl) + for i in range(len(vec)): + val = sim.gpr(16+i).value + res.append(val) + crf = sim.crl[i].get_range().value + print ("i", i, val, crf) + for i in range(len(vec)): + crf = sim.crl[i].get_range().value + assert crf == crs_expected[i], "cr %d %s expect %s" % \ + (i, crf, crs_expected[i]) + assert sim.svstate.vl == 2 + def test_sv_cmp(self): lst = SVP64Asm(["sv.cmp *0, 1, *16, 0", ]) @@ -125,7 +239,7 @@ class DecoderTestCase(FHDLTestCase): crf = sim.crl[i].get_range().value print ("i", i, val, crf) # confirm that the results are as expected - expected = reversed(sorted(vec)) + expected = list(reversed(sorted(vec))) for i, v in enumerate(res): self.assertEqual(v, expected[i]) diff --git a/src/openpower/decoder/power_decoder2.py b/src/openpower/decoder/power_decoder2.py index 2f981f35..1ad9a52f 100644 --- a/src/openpower/decoder/power_decoder2.py +++ b/src/openpower/decoder/power_decoder2.py @@ -678,6 +678,8 @@ class DecodeCROut(Elaboratable): self.cr_bitfield = Data(3, "cr_bitfield") self.whole_reg = Data(8, "cr_fxm") self.sv_override = Signal(2, reset_less=True) # do not do EXTRA spec + self.cr_5bit = Signal(reset_less=True) # set True for 5-bit + self.cr_2bit = Signal(2, reset_less=True) # get lowest 2 bits def elaborate(self, platform): m = Module() @@ -689,6 +691,7 @@ class DecodeCROut(Elaboratable): comb += self.cr_bitfield.ok.eq(0) comb += self.whole_reg.ok.eq(0) comb += self.sv_override.eq(0) + comb += self.cr_5bit.eq(0) # please note these MUST match (setting of cr_bitfield.ok) exactly # with write_cr0 below in PowerDecoder2. the reason it's separated @@ -713,6 +716,8 @@ class DecodeCROut(Elaboratable): with m.Case(CROutSel.BT): comb += self.cr_bitfield.data.eq(self.dec.FormXL.BT[2:5]) comb += self.cr_bitfield.ok.eq(1) + comb += self.cr_5bit.eq(1) + comb += self.cr_2bit.eq(self.dec.FormXL.BT[0:2]) with m.Case(CROutSel.WHOLE_REG): comb += self.whole_reg.ok.eq(1) move_one = Signal(reset_less=True) @@ -1153,6 +1158,7 @@ class PowerDecode2(PowerDecodeSubset): regreduce_en=False, fp_en=fp_en) self.ldst_exc = LDSTException("dec2_exc") # rewrites as OP_TRAP self.instr_fault = Signal() # rewrites instruction as OP_FETCH_FAILED + self.crout_5bit = Signal() # CR out is 5-bit if self.svp64_en: self.cr_out_isvec = Signal(1, name="cr_out_isvec") @@ -1226,6 +1232,7 @@ class PowerDecode2(PowerDecodeSubset): m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec, op) m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec, op) comb += dec_a.sv_nz.eq(self.sv_a_nz) + comb += self.crout_5bit.eq(self.dec_cr_out.cr_5bit) if self.svp64_en: # and SVP64 Extra decoders @@ -1517,6 +1524,10 @@ class PowerDecode2(PowerDecodeSubset): if self.svp64_en: comb += self.rm_dec.ldst_ra_vec.eq(self.in1_isvec) # RA is vector + comb += self.rm_dec.cr_5bit_in.eq(self.crout_5bit) # CR is 5-bit + # take bottom 2 bits of CR out (CR field selector) + with m.If(self.crout_5bit): + comb += self.rm_dec.cr_2bit_in.eq(self.dec_cr_out.cr_2bit) # SPRs out comb += e.read_spr1.eq(dec_a.spr_out) diff --git a/src/openpower/decoder/power_svp64_rm.py b/src/openpower/decoder/power_svp64_rm.py index c46e39fb..f1f86e65 100644 --- a/src/openpower/decoder/power_svp64_rm.py +++ b/src/openpower/decoder/power_svp64_rm.py @@ -103,6 +103,8 @@ class SVP64RMModeDecode(Elaboratable): self.svp64_vf_in = Signal() # Vertical-First Mode self.ptype_in = Signal(SVPtype) self.rc_in = Signal() + self.cr_5bit_in = Signal() # if CR field was 5-bit + self.cr_2bit_in = Signal() # bottom 2 bits of CR field self.ldst_ra_vec = Signal() # set when RA is vec, indicate Index mode self.ldst_imz_in = Signal() # set when LD/ST immediate is zero @@ -180,6 +182,15 @@ class SVP64RMModeDecode(Elaboratable): with m.Case(2,3): comb += self.mode.eq(SVP64RMMode.FFIRST) # fail-first + # extract failfirst + with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first + comb += self.inv.eq(mode[SVP64MODE.INV]) + comb += self.vli.eq(mode[SVP64MODE.BC_VLSET]) + with m.If(self.cr_5bit_in): + comb += self.cr_sel.eq(0b10) # EQ bit index is implicit + with m.Else(): + comb += self.cr_sel.eq(cr) + with m.Else(): # combined arith / ldst decoding due to similarity with m.Switch(mode2): -- 2.30.2