From 2e48c54d08037c28e99f4337f40d24d8593f6f4e Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sat, 14 Aug 2021 11:43:46 +0100 Subject: [PATCH] messy resolution of sv.bc testing, early-out detection. --- src/openpower/decoder/isa/caller.py | 71 ++++++++++++++++--- .../decoder/isa/test_caller_svp64_bc.py | 12 ++-- src/openpower/decoder/power_decoder2.py | 3 +- src/openpower/decoder/pseudo/pywriter.py | 8 ++- 4 files changed, 77 insertions(+), 17 deletions(-) diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index 7d8497da..0026f332 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -21,7 +21,7 @@ from openpower.decoder.selectable_int import (FieldSelectableInt, SelectableInt, selectconcat) from openpower.decoder.power_enums import (spr_dict, spr_byname, XER_bits, insns, MicrOp, In1Sel, In2Sel, In3Sel, - OutSel, CROutSel, LDSTMode, + OutSel, CRInSel, CROutSel, LDSTMode, SVP64RMMode, SVP64PredMode, SVP64PredInt, SVP64PredCR, SVP64LDSTmode) @@ -72,6 +72,7 @@ REG_SORT_ORDER = { "RB": 0, "RC": 0, "RS": 0, + "BI": 0, "CR": 0, "LR": 0, "CTR": 0, @@ -441,6 +442,30 @@ def get_pdecode_idx_in(dec2, name): return None, False +# TODO, really should just be using PowerDecoder2 +def get_pdecode_cr_in(dec2, name): + op = dec2.dec.op + in_sel = yield op.cr_in + in_bitfield = yield dec2.dec_cr_in.cr_bitfield.data + sv_cr_in = yield op.sv_cr_in + spec = yield dec2.crin_svdec.spec + sv_override = yield dec2.dec_cr_in.sv_override + # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec) + in1 = yield dec2.e.read_cr1.data + cr_isvec = yield dec2.cr_in_isvec + log ("get_pdecode_cr_in", in_sel, CROutSel.CR0.value, in1, cr_isvec) + log (" sv_cr_in", sv_cr_in) + log (" cr_bf", in_bitfield) + log (" spec", spec) + log (" override", sv_override) + # identify which regnames map to in / o2 + if name == 'BI': + if in_sel == CRInSel.BI.value: + return in1, cr_isvec + log ("get_pdecode_cr_in not found", name) + return None, False + + # TODO, really should just be using PowerDecoder2 def get_pdecode_cr_out(dec2, name): op = dec2.dec.op @@ -731,11 +756,22 @@ class ISACaller: fields = self.decoder.sigforms[formname] log("prep_namespace", formname, op_fields) for name in op_fields: - if name == 'spr': - sig = getattr(fields, name.upper()) - else: + # CR immediates. deal with separately. needs modifying + # pseudocode + if self.is_svp64_mode and name in ['BI']: # TODO, more CRs + # BI is a 5-bit, must reconstruct the value + regnum, is_vec = yield from get_pdecode_cr_in(self.dec2, name) sig = getattr(fields, name) - val = yield sig + val = yield sig + # low 2 LSBs (CR field selector) remain same, CR num extended + assert regnum <= 7, "sigh, TODO, 128 CR fields" + val = (val & 0b11) | (regnum<<2) + else: + if name == 'spr': + sig = getattr(fields, name.upper()) + else: + sig = getattr(fields, name) + val = yield sig # these are all opcode fields involved in index-selection of CR, # and need to do "standard" arithmetic. CR[BA+32] for example # would, if using SelectableInt, only be 5-bit. @@ -1557,7 +1593,7 @@ class ISACaller: else: if self.allow_next_step_inc == 2: log ("SVSTATE_NEXT: read") - yield from self.svstate_post_inc() + yield from self.svstate_post_inc(ins_name) else: log ("SVSTATE_NEXT: post-inc") # use actual src/dst-step here to check end, do NOT @@ -1597,7 +1633,7 @@ class ISACaller: self.svstate.vfirst = 0 elif self.is_svp64_mode: - yield from self.svstate_post_inc() + yield from self.svstate_post_inc(ins_name) else: # XXX only in non-SVP64 mode! # record state of whether the current operation was an svshape, @@ -1718,7 +1754,7 @@ class ISACaller: # nothing needs doing (TODO zeroing): just do next instruction return srcstep == vl or dststep == vl - def svstate_post_inc(self, vf=0): + def svstate_post_inc(self, insn_name, vf=0): # check if SV "Vertical First" mode is enabled vfirst = self.svstate.vfirst log (" SV Vertical First", vf, vfirst) @@ -1758,9 +1794,21 @@ class ISACaller: if svp64_is_vector and srcstep != vl-1 and dststep != vl-1: self.svstate.srcstep += SelectableInt(1, 7) self.svstate.dststep += SelectableInt(1, 7) + self.namespace['SVSTATE'] = self.svstate + # check if this was an sv.bc* and if so did it succeed + if self.is_svp64_mode and insn_name.startswith("sv.bc"): + ctr_ok = self.namespace['ctr_ok'] + cond_ok = self.namespace['cond_ok'] + log("branch ctr/cond", ctr_ok, cond_ok) + if ctr_ok.value and cond_ok.value : + self.svp64_reset_loop() + self.update_pc_next() + return True + # not an SVP64 branch, so fix PC (NIA==CIA) for next loop + # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64) + # this way we keep repeating the same instruction (with new steps) self.pc.NIA.value = self.pc.CIA.value self.namespace['NIA'] = self.pc.NIA - self.namespace['SVSTATE'] = self.svstate log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA']) return False # DO NOT allow PC update whilst Sub-PC loop running @@ -1789,6 +1837,7 @@ class ISACaller: self.pc.update_nia(self.is_svp64_mode) self.namespace['NIA'] = self.pc.NIA + def inject(): """Decorator factory. @@ -1819,6 +1868,10 @@ def inject(): args[0].namespace['NIA'], args[0].namespace['SVSTATE']) args[0].namespace = func_globals + if 'cond_ok' in args[0].namespace: + log("args[0] cond_ok ctr_ok", + args[0].namespace['cond_ok'], + args[0].namespace['ctr_ok']) #exec (func.__code__, func_globals) # finally: diff --git a/src/openpower/decoder/isa/test_caller_svp64_bc.py b/src/openpower/decoder/isa/test_caller_svp64_bc.py index 983cde26..65ba1b70 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_bc.py +++ b/src/openpower/decoder/isa/test_caller_svp64_bc.py @@ -51,13 +51,15 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(sim.gpr(10), SelectableInt(0x1235, 64)) def test_sv_branch_cond(self): - for i in [0]: #[0, 10]: + for i in [0, 10]: #[0, 10]: lst = SVP64Asm( [f"addi 1, 0, {i}", # set r1 to i + f"addi 2, 0, {i}", # set r2 to i "cmpi cr0, 1, 1, 10", # compare r1 with 10 and store to cr0 - "sv.bc 12, 2, 0x8", # beq 0x8 - + "cmpi cr1, 1, 2, 10", # compare r2 with 10 and store to cr1 + "sv.bc 12, 2.v, 0x8", # beq 0x8 - # branch if r1 equals 10 to the nop below - "addi 2, 0, 0x1234", # if r1 == 10 this shouldn't execute + "addi 3, 0, 0x1234", # if r1 == 10 this shouldn't execute "or 0, 0, 0"] # branch target ) lst = list(lst) @@ -71,9 +73,9 @@ class DecoderTestCase(FHDLTestCase): with Program(lst, bigendian=False) as program: sim = self.run_tst_program(program, svstate=svstate) if i == 10: - self.assertEqual(sim.gpr(2), SelectableInt(0, 64)) + self.assertEqual(sim.gpr(3), SelectableInt(0, 64)) else: - self.assertEqual(sim.gpr(2), SelectableInt(0x1234, 64)) + self.assertEqual(sim.gpr(3), SelectableInt(0x1234, 64)) def tst_sv_add_cr(self): """>>> lst = ['sv.add. 1.v, 5.v, 9.v' diff --git a/src/openpower/decoder/power_decoder2.py b/src/openpower/decoder/power_decoder2.py index b20a014a..6db5c615 100644 --- a/src/openpower/decoder/power_decoder2.py +++ b/src/openpower/decoder/power_decoder2.py @@ -1191,8 +1191,9 @@ class PowerDecode2(PowerDecodeSubset): m.submodules.o_svdec = o_svdec = SVP64RegExtra() m.submodules.o2_svdec = o2_svdec = SVP64RegExtra() - # debug access to crout_svdec (used in get_pdecode_cr_out) + # debug access to cr svdec (used in get_pdecode_cr_in/out) self.crout_svdec = crout_svdec + self.crin_svdec = crin_svdec # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec reg = Signal(5, reset_less=True) diff --git a/src/openpower/decoder/pseudo/pywriter.py b/src/openpower/decoder/pseudo/pywriter.py index 8cf2ed9b..8cde6d19 100644 --- a/src/openpower/decoder/pseudo/pywriter.py +++ b/src/openpower/decoder/pseudo/pywriter.py @@ -93,8 +93,12 @@ class PyISAWriter(ISA): op_fname = "op_%s" % page.replace(".", "_") f.write(" @inject()\n") f.write(" def %s(%s):\n" % (op_fname, args)) - if 'NIA' in pycode: # HACK - TODO fix - f.write(" global NIA\n") + # blech! this works in combination with ISACaller + # @inject decorator, which works by injecting + # global variables into the function namespace. + for blech in ['NIA', 'cond_ok', 'ctr_ok']: + if blech in pycode: # HACK - TODO fix + f.write(" global %s\n" % blech) pycode = pycode.split("\n") pycode = '\n'.join(map(lambda x: " %s" % x, pycode)) pycode = pycode.rstrip() -- 2.30.2