X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fopenpower%2Fdecoder%2Fisa%2Fcaller.py;h=94ac7c008366d5edc9a7fadd34e8033e135084df;hb=2ffb995ff4385262032c8e4090b9a2702a5ee01e;hp=a778e4355c0a2d7c304e1e6c179970ca226a4e8f;hpb=758625bc0eee28ab8cfec201097001e671df0546;p=openpower-isa.git diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index a778e435..94ac7c00 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -37,7 +37,7 @@ from openpower.decoder.power_enums import SVPtype from openpower.decoder.helpers import (exts, gtu, ltu, undefined, ISACallerHelper, ISAFPHelpers) from openpower.consts import PIb, MSRb # big-endian (PowerISA versions) -from openpower.consts import (SVP64MODE, +from openpower.consts import (SVP64MODE, SVP64MODEb, SVP64CROffs, ) from openpower.decoder.power_svp64 import SVP64RM, decode_extra @@ -66,6 +66,9 @@ special_sprs = { 'VRSAVE': 256} +# rrright. this is here basically because the compiler pywriter returns +# results in a specific priority order. to make sure regs match up they +# need partial sorting. sigh. REG_SORT_ORDER = { # TODO (lkcl): adjust other registers that should be in a particular order # probably CA, CA32, and CR @@ -95,6 +98,7 @@ REG_SORT_ORDER = { "CA32": 0, "overflow": 7, # should definitely be last + "CR0": 8, # likewise } fregs = ['FRA', 'FRB', 'FRC', 'FRS', 'FRT'] @@ -278,33 +282,32 @@ class CRFields: _cr = FieldSelectableInt(self.cr, bits) self.crl.append(_cr) -# decode SVP64 predicate integer to reg number and invert - +# decode SVP64 predicate integer to reg number and invert def get_predint(gpr, mask): + r3 = gpr(3) r10 = gpr(10) r30 = gpr(30) log("get_predint", mask, SVP64PredInt.ALWAYS.value) if mask == SVP64PredInt.ALWAYS.value: return 0xffff_ffff_ffff_ffff # 64 bits of 1 if mask == SVP64PredInt.R3_UNARY.value: - return 1 << (gpr(3).value & 0b111111) + return 1 << (r3.value & 0b111111) if mask == SVP64PredInt.R3.value: - return gpr(3).value + return r3.value if mask == SVP64PredInt.R3_N.value: - return ~gpr(3).value + return ~r3.value if mask == SVP64PredInt.R10.value: - return gpr(10).value + return r10.value if mask == SVP64PredInt.R10_N.value: - return ~gpr(10).value + return ~r10.value if mask == SVP64PredInt.R30.value: - return gpr(30).value + return r30.value if mask == SVP64PredInt.R30_N.value: - return ~gpr(30).value - -# decode SVP64 predicate CR to reg number and invert status + return ~r30.value +# decode SVP64 predicate CR to reg number and invert status def _get_predcr(mask): if mask == SVP64PredCR.LT.value: return 0, 1 @@ -323,10 +326,9 @@ def _get_predcr(mask): if mask == SVP64PredCR.NS.value: return 3, 0 + # read individual CR fields (0..VL-1), extract the required bit # and construct the mask - - def get_predcr(crl, mask, vl): idx, noninv = _get_predcr(mask) mask = 0 @@ -376,6 +378,8 @@ def get_pdecode_idx_in(dec2, name): return in3, in3_isvec # XXX TODO, RC doesn't exist yet! elif name == 'RC': + if in3_sel == In3Sel.RC.value: + return in3, in3_isvec assert False, "RC does not exist yet" elif name == 'RS': if in1_sel == In1Sel.RS.value: @@ -445,7 +449,7 @@ def get_pdecode_cr_out(dec2, name): if name == 'CR0': if out_sel == CROutSel.CR0.value: return out, o_isvec - if name == 'CR1': # these are not actually calculated correctly + if name == 'CR1': # these are not actually calculated correctly if out_sel == CROutSel.CR1.value: return out, o_isvec log("get_pdecode_cr_out not found", name) @@ -512,10 +516,14 @@ def get_pdecode_idx_out2(dec2, name): out, o_isvec) if upd == LDSTMode.update.value: return out, o_isvec + if name == 'RS': + fft_en = yield dec2.implicit_rs + if fft_en: + log("get_pdecode_idx_out2", out_sel, OutSel.RS.value, + out, o_isvec) + return out, o_isvec if name == 'FRS': - int_op = yield dec2.dec.op.internal_op - fft_en = yield dec2.use_svp64_fft - # if int_op == MicrOp.OP_FP_MADD.value and fft_en: + fft_en = yield dec2.implicit_rs if fft_en: log("get_pdecode_idx_out2", out_sel, OutSel.FRS.value, out, o_isvec) @@ -529,11 +537,19 @@ class StepLoop: def __init__(self, svstate): self.svstate = svstate + self.new_iterators() - def get_iterators(self): + def new_iterators(self): self.src_it = self.src_iterator() self.dst_it = self.dst_iterator() - + self.loopend = False + self.new_srcstep = 0 + self.new_dststep = 0 + self.new_ssubstep = 0 + self.new_dsubstep = 0 + self.pred_dst_zero = 0 + self.pred_src_zero = 0 + def src_iterator(self): """source-stepping iterator """ @@ -542,36 +558,68 @@ class StepLoop: # source step if pack: # pack advances subvl in *outer* loop - if end_src: - if not end_ssub: - self.svstate.ssubstep += SelectableInt(1, 2) - self.svstate.srcstep = SelectableInt(0, 7) # reset - else: - self.svstate.srcstep += SelectableInt(1, 7) # advance srcstep + while True: # outer subvl loop + while True: # inner vl loop + vl = self.svstate.vl + subvl = self.subvl + srcmask = self.srcmask + srcstep = self.svstate.srcstep + pred_src_zero = ((1 << srcstep) & srcmask) != 0 + if self.pred_sz or pred_src_zero: + self.pred_src_zero = not pred_src_zero + log(" advance src", srcstep, vl, + self.svstate.ssubstep, subvl) + # yield actual substep/srcstep + yield (self.svstate.ssubstep, srcstep) + # the way yield works these could have been modified. + vl = self.svstate.vl + subvl = self.subvl + srcstep = self.svstate.srcstep + log(" advance src check", srcstep, vl, + self.svstate.ssubstep, subvl, srcstep == vl-1, + self.svstate.ssubstep == subvl) + if srcstep == vl-1: # end-point + self.svstate.srcstep = SelectableInt(0, 7) # reset + if self.svstate.ssubstep == subvl: # end-point + log(" advance pack stop") + return + break # exit inner loop + self.svstate.srcstep += SelectableInt(1, 7) # advance ss + subvl = self.subvl + if self.svstate.ssubstep == subvl: # end-point + self.svstate.ssubstep = SelectableInt(0, 2) # reset + log(" advance pack stop") + return + self.svstate.ssubstep += SelectableInt(1, 2) + else: # these cannot be done as for-loops because SVSTATE may change # (srcstep/substep may be modified, interrupted, subvl/vl change) # but they *can* be done as while-loops as long as every SVSTATE # "thing" is re-read every single time a yield gives indices - while True: # outer vl loop - while True: # inner subvl loop + while True: # outer vl loop + while True: # inner subvl loop + vl = self.svstate.vl subvl = self.subvl srcmask = self.srcmask srcstep = self.svstate.srcstep - if self.pred_sz or ((1 << srcstep) & srcmask) != 0: - log(" advance src", srcstep, self.svstate.vl, - self.svstate.ssubstep, subvl) + pred_src_zero = ((1 << srcstep) & srcmask) != 0 + if self.pred_sz or pred_src_zero: + self.pred_src_zero = not pred_src_zero + log(" advance src", srcstep, vl, + self.svstate.ssubstep, subvl) # yield actual substep/srcstep yield (self.svstate.ssubstep, srcstep) - if self.svstate.ssubstep == subvl: # end-point + if self.svstate.ssubstep == subvl: # end-point self.svstate.ssubstep = SelectableInt(0, 2) # reset - break + break # exit inner loop self.svstate.ssubstep += SelectableInt(1, 2) vl = self.svstate.vl - if srcstep == vl-1: # end-point + if srcstep == vl-1: # end-point self.svstate.srcstep = SelectableInt(0, 7) # reset - break # trigger StopIteration - self.svstate.srcstep += SelectableInt(1, 7) # advance srcstep + self.loopend = True + return + self.svstate.srcstep += SelectableInt(1, 7) # advance srcstep def dst_iterator(self): """dest-stepping iterator @@ -581,103 +629,214 @@ class StepLoop: # dest step if unpack: # pack advances subvl in *outer* loop - pass # TODO + while True: # outer subvl loop + while True: # inner vl loop + vl = self.svstate.vl + subvl = self.subvl + dstmask = self.dstmask + dststep = self.svstate.dststep + pred_dst_zero = ((1 << dststep) & dstmask) != 0 + if self.pred_dz or pred_dst_zero: + self.pred_dst_zero = not pred_dst_zero + log(" advance dst", dststep, vl, + self.svstate.dsubstep, subvl) + # yield actual substep/dststep + yield (self.svstate.dsubstep, dststep) + # the way yield works these could have been modified. + vl = self.svstate.vl + dststep = self.svstate.dststep + log(" advance dst check", dststep, vl, + self.svstate.ssubstep, subvl) + if dststep == vl-1: # end-point + self.svstate.dststep = SelectableInt(0, 7) # reset + if self.svstate.dsubstep == subvl: # end-point + log(" advance unpack stop") + return + break + self.svstate.dststep += SelectableInt(1, 7) # advance ds + subvl = self.subvl + if self.svstate.dsubstep == subvl: # end-point + self.svstate.dsubstep = SelectableInt(0, 2) # reset + log(" advance unpack stop") + return + self.svstate.dsubstep += SelectableInt(1, 2) else: # these cannot be done as for-loops because SVSTATE may change # (dststep/substep may be modified, interrupted, subvl/vl change) # but they *can* be done as while-loops as long as every SVSTATE # "thing" is re-read every single time a yield gives indices - while True: # outer vl loop - while True: # inner subvl loop + while True: # outer vl loop + while True: # inner subvl loop subvl = self.subvl dstmask = self.dstmask dststep = self.svstate.dststep - if self.pred_dz or ((1 << dststep) & dstmask) != 0: + pred_dst_zero = ((1 << dststep) & dstmask) != 0 + if self.pred_dz or pred_dst_zero: + self.pred_dst_zero = not pred_dst_zero log(" advance dst", dststep, self.svstate.vl, - self.svstate.dsubstep, subvl) + self.svstate.dsubstep, subvl) # yield actual substep/dststep yield (self.svstate.dsubstep, dststep) - if self.svstate.dsubstep == subvl: # end-point + if self.svstate.dsubstep == subvl: # end-point self.svstate.dsubstep = SelectableInt(0, 2) # reset break self.svstate.dsubstep += SelectableInt(1, 2) + subvl = self.subvl vl = self.svstate.vl - if dststep == vl-1: # end-point + if dststep == vl-1: # end-point self.svstate.dststep = SelectableInt(0, 7) # reset - break # trigger StopIteration - self.svstate.dststep += SelectableInt(1, 7) # advance dststep + return + self.svstate.dststep += SelectableInt(1, 7) # advance dststep def src_iterate(self): """source-stepping iterator """ - end_src = self.end_src subvl = self.subvl + vl = self.svstate.vl pack = self.svstate.pack unpack = self.svstate.unpack ssubstep = self.svstate.ssubstep end_ssub = ssubstep == subvl + end_src = self.svstate.srcstep == vl-1 log(" pack/unpack/subvl", pack, unpack, subvl, - "end", end_src, - "sub", end_ssub) + "end", end_src, + "sub", end_ssub) # first source step srcstep = self.svstate.srcstep + srcmask = self.srcmask if pack: # pack advances subvl in *outer* loop - if end_src: - if not end_ssub: - self.svstate.ssubstep += SelectableInt(1, 2) - self.svstate.srcstep = SelectableInt(0, 7) # reset - else: - self.svstate.srcstep += SelectableInt(1, 7) # advance srcstep + while True: + assert srcstep <= vl-1 + end_src = srcstep == vl-1 + if end_src: + if end_ssub: + self.loopend = True + else: + self.svstate.ssubstep += SelectableInt(1, 2) + srcstep = 0 # reset + break + else: + srcstep += 1 # advance srcstep + if not self.srcstep_skip: + break + if ((1 << srcstep) & srcmask) != 0: + break + else: + log(" sskip", bin(srcmask), bin(1 << srcstep)) else: # advance subvl in *inner* loop if end_ssub: - if not end_src: - self.svstate.srcstep += SelectableInt(1, 7) + while True: + assert srcstep <= vl-1 + end_src = srcstep == vl-1 + if end_src: # end-point + self.loopend = True + srcstep = 0 + break + else: + srcstep += 1 + if not self.srcstep_skip: + break + if ((1 << srcstep) & srcmask) != 0: + break + else: + log(" sskip", bin(srcmask), bin(1 << srcstep)) self.svstate.ssubstep = SelectableInt(0, 2) # reset else: - self.svstate.ssubstep += SelectableInt(1, 2) # advance ssubstep + # advance ssubstep + self.svstate.ssubstep += SelectableInt(1, 2) - log(" advance src", self.svstate.srcstep, self.svstate.ssubstep) + self.svstate.srcstep = SelectableInt(srcstep, 7) + log(" advance src", self.svstate.srcstep, self.svstate.ssubstep, + self.loopend) def dst_iterate(self): """dest step iterator """ - end_dst = self.end_dst + vl = self.svstate.vl subvl = self.subvl pack = self.svstate.pack unpack = self.svstate.unpack dsubstep = self.svstate.dsubstep end_dsub = dsubstep == subvl + dststep = self.svstate.dststep + end_dst = dststep == vl-1 + dstmask = self.dstmask log(" pack/unpack/subvl", pack, unpack, subvl, - "end", end_dst, - "sub", end_dsub) + "end", end_dst, + "sub", end_dsub) # now dest step if unpack: # unpack advances subvl in *outer* loop - if end_dst: - if not end_dsub: - self.svstate.dsubstep += SelectableInt(1, 2) - self.svstate.dststep = SelectableInt(0, 7) # reset - else: - self.svstate.dststep += SelectableInt(1, 7) # advance dststep + while True: + assert dststep <= vl-1 + end_dst = dststep == vl-1 + if end_dst: + if end_dsub: + self.loopend = True + else: + self.svstate.dsubstep += SelectableInt(1, 2) + dststep = 0 # reset + break + else: + dststep += 1 # advance dststep + if not self.dststep_skip: + break + if ((1 << dststep) & dstmask) != 0: + break + else: + log(" dskip", bin(dstmask), bin(1 << dststep)) else: # advance subvl in *inner* loop if end_dsub: - if not end_dst: - self.svstate.dststep += SelectableInt(1, 7) + while True: + assert dststep <= vl-1 + end_dst = dststep == vl-1 + if end_dst: # end-point + self.loopend = True + dststep = 0 + break + else: + dststep += 1 + if not self.dststep_skip: + break + if ((1 << dststep) & dstmask) != 0: + break + else: + log(" dskip", bin(dstmask), bin(1 << dststep)) self.svstate.dsubstep = SelectableInt(0, 2) # reset else: - self.svstate.dsubstep += SelectableInt(1, 2) # advance ssubstep - log(" advance dst", self.svstate.dststep, self.svstate.dsubstep) + # advance ssubstep + self.svstate.dsubstep += SelectableInt(1, 2) - def advance_svstate_steps(self, end_src=False, end_dst=False): + self.svstate.dststep = SelectableInt(dststep, 7) + log(" advance dst", self.svstate.dststep, self.svstate.dsubstep, + self.loopend) + + def at_loopend(self): + """tells if this is the last possible element. uses the cached values + for src/dst-step and sub-steps + """ + subvl = self.subvl + vl = self.svstate.vl + srcstep, dststep = self.new_srcstep, self.new_dststep + ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep + end_ssub = ssubstep == subvl + end_dsub = dsubstep == subvl + if srcstep == vl-1 and end_ssub: + return True + if dststep == vl-1 and end_dsub: + return True + return False + + def advance_svstate_steps(self): """ advance sub/steps. note that Pack/Unpack *INVERTS* the order. TODO when Pack/Unpack is set, substep becomes the *outer* loop """ self.subvl = yield self.dec2.rm_dec.rm_in.subvl - self.end_src = end_src - self.end_dst = end_dst + if self.loopend: # huhn?? + return self.src_iterate() self.dst_iterate() @@ -720,11 +879,10 @@ class StepLoop: self.pred_sz = pred_sz self.new_ssubstep = ssubstep log(" new ssubstep", ssubstep) - if ssubstart: - # until the predicate mask has a "1" bit... or we run out of VL - # let srcstep==VL be the indicator to move to next instruction - if not pred_sz: - self.srcstep_skip = True + # until the predicate mask has a "1" bit... or we run out of VL + # let srcstep==VL be the indicator to move to next instruction + if not pred_sz: + self.srcstep_skip = True def read_dst_mask(self): """same as read_src_mask - check and record everything needed @@ -761,9 +919,8 @@ class StepLoop: self.pred_dz = pred_dz self.new_dsubstep = dsubstep log(" new dsubstep", dsubstep) - if dsubstart: - if not pred_dz: - self.dststep_skip = True + if not pred_dz: + self.dststep_skip = True def svstate_pre_inc(self): """check if srcstep/dststep need to skip over masked-out predicate bits @@ -771,6 +928,7 @@ class StepLoop: it is purely for skipping masked-out bits """ + self.subvl = yield self.dec2.rm_dec.rm_in.subvl yield from self.read_src_mask() yield from self.read_dst_mask() @@ -785,6 +943,9 @@ class StepLoop: vl = self.svstate.vl # srcstep-skipping opportunity identified if self.srcstep_skip: + # cannot do this with sv.bc - XXX TODO + if srcmask == 0: + self.loopend = True while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl): log(" sskip", bin(1 << srcstep)) srcstep += 1 @@ -805,6 +966,9 @@ class StepLoop: pred_dst_zero = self.pred_dz vl = self.svstate.vl if self.dststep_skip: + # cannot do this with sv.bc - XXX TODO + if dstmask == 0: + self.loopend = True while (((1 << dststep) & dstmask) == 0) and (dststep != vl): log(" dskip", bin(1 << dststep)) dststep += 1 @@ -888,10 +1052,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # set up 4 dummy SVSHAPEs if they aren't already set up for i in range(4): sname = 'SVSHAPE%d' % i - if sname not in self.spr: - val = 0 - else: - val = self.spr[sname].value + val = self.spr.get(sname, 0) # make sure it's an SVSHAPE self.spr[sname] = SVSHAPE(val, self.gpr) self.last_op_svshape = False @@ -991,7 +1152,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.spr['SRR1'].value = msr if self.is_svp64_mode: self.spr['SVSRR0'] = self.namespace['SVSTATE'].value - self.trap_nia = SelectableInt(trap_addr | (kaivb&~0x1fff), 64) + self.trap_nia = SelectableInt(trap_addr | (kaivb & ~0x1fff), 64) self.spr['SRR1'][trap_bit] = 1 # change *copy* of MSR in SRR1 # set exception bits. TODO: this should, based on the address @@ -1063,13 +1224,15 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if self.is_svp64_mode and insn_name.startswith("sv.bc"): # blegh grab bits manually mode = yield self.dec2.rm_dec.rm_in.mode - bc_vlset = (mode & SVP64MODE.BC_VLSET) != 0 - bc_vli = (mode & SVP64MODE.BC_VLI) != 0 - bc_snz = (mode & SVP64MODE.BC_SNZ) != 0 + mode = SelectableInt(mode, 5) # convert to SelectableInt before test + bc_vlset = mode[SVP64MODEb.BC_VLSET] != 0 + bc_vli = mode[SVP64MODEb.BC_VLI] != 0 + bc_snz = mode[SVP64MODEb.BC_SNZ] != 0 bc_vsb = yield self.dec2.rm_dec.bc_vsb bc_lru = yield self.dec2.rm_dec.bc_lru bc_gate = yield self.dec2.rm_dec.bc_gate sz = yield self.dec2.rm_dec.pred_sz + self.namespace['mode'] = SelectableInt(mode, 5) self.namespace['ALL'] = SelectableInt(bc_gate, 1) self.namespace['VSb'] = SelectableInt(bc_vsb, 1) self.namespace['LRu'] = SelectableInt(bc_lru, 1) @@ -1078,7 +1241,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.namespace['sz'] = SelectableInt(sz, 1) self.namespace['SNZ'] = SelectableInt(bc_snz, 1) - def handle_carry_(self, inputs, outputs, already_done): + def handle_carry_(self, inputs, output, ca, ca32): inv_a = yield self.dec2.e.do.invert_in if inv_a: inputs[0] = ~inputs[0] @@ -1087,12 +1250,6 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if imm_ok: imm = yield self.dec2.e.do.imm_data.data inputs.append(SelectableInt(imm, 64)) - assert len(outputs) >= 1 - log("outputs", repr(outputs)) - if isinstance(outputs, list) or isinstance(outputs, tuple): - output = outputs[0] - else: - output = outputs gts = [] for x in inputs: log("gt input", x, output) @@ -1101,10 +1258,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): log(gts) cy = 1 if any(gts) else 0 log("CA", cy, gts) - if not (1 & already_done): + if ca is None: # already written self.spr['XER'][XER_bits['CA']] = cy - log("inputs", already_done, inputs) # 32 bit carry # ARGH... different for OP_ADD... *sigh*... op = yield self.dec2.e.do.insn_type @@ -1127,10 +1283,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): gts.append(gt) cy32 = 1 if any(gts) else 0 log("CA32", cy32, gts) - if not (2 & already_done): + if ca32 is None: # already written self.spr['XER'][XER_bits['CA32']] = cy32 - def handle_overflow(self, inputs, outputs, div_overflow): + def handle_overflow(self, inputs, output, div_overflow): if hasattr(self.dec2.e.do, "invert_in"): inv_a = yield self.dec2.e.do.invert_in if inv_a: @@ -1140,8 +1296,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if imm_ok: imm = yield self.dec2.e.do.imm_data.data inputs.append(SelectableInt(imm, 64)) - assert len(outputs) >= 1 - log("handle_overflow", inputs, outputs, div_overflow) + log("handle_overflow", inputs, output, div_overflow) if len(inputs) < 2 and div_overflow is None: return @@ -1151,8 +1306,6 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): ov, ov32 = div_overflow, div_overflow # arithmetic overflow can be done by analysing the input and output elif len(inputs) >= 2: - output = outputs[0] - # OV (64-bit) input_sgn = [exts(x.value, x.bits) < 0 for x in inputs] output_sgn = exts(output.value, output.bits) < 0 @@ -1167,14 +1320,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # now update XER OV/OV32/SO so = self.spr['XER'][XER_bits['SO']] - new_so = so | ov # sticky overflow ORs in old with new + new_so = so | ov # sticky overflow ORs in old with new self.spr['XER'][XER_bits['OV']] = ov self.spr['XER'][XER_bits['OV32']] = ov32 self.spr['XER'][XER_bits['SO']] = new_so log(" set overflow", ov, ov32, so, new_so) - def handle_comparison(self, outputs, cr_idx=0, overflow=None, no_so=False): - out = outputs[0] + def handle_comparison(self, out, cr_idx=0, overflow=None, no_so=False): assert isinstance(out, SelectableInt), \ "out zero not a SelectableInt %s" % repr(outputs) log("handle_comparison", out.bits, hex(out.value)) @@ -1269,18 +1421,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): """execute one instruction """ # get the disassembly code for this instruction - if self.is_svp64_mode: - if not self.disassembly: - code = yield from self.get_assembly_name() - else: - code = self.disassembly[self._pc+4] - log(" svp64 sim-execute", hex(self._pc), code) + if not self.disassembly: + code = yield from self.get_assembly_name() else: - if not self.disassembly: - code = yield from self.get_assembly_name() - else: - code = self.disassembly[self._pc] - log("sim-execute", hex(self._pc), code) + offs, dbg = 0, "" + if self.is_svp64_mode: + offs, dbg = 4, "svp64 " + code = self.disassembly[self._pc+offs] + log(" %s sim-execute" % dbg, hex(self._pc), code) opname = code.split(' ')[0] try: yield from self.call(opname) # execute the instruction @@ -1343,6 +1491,11 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): else: rc_en = False rc_ok = False + # annoying: ignore rc_ok if RC1 is set (for creating *assembly name*) + RC1 = yield self.dec2.rm_dec.RC1 + if RC1: + rc_en = False + rc_ok = False # grrrr have to special-case MUL op (see DecodeOE) log("ov %d en %d rc %d en %d op %d" % (ov_ok, ov_en, rc_ok, rc_en, int_op)) @@ -1379,6 +1532,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): asmop = 'mtcrf' return asmop + def reset_remaps(self): + self.remap_loopends = [0] * 4 + self.remap_idxs = [0, 1, 2, 3] + def get_remap_indices(self): """WARNING, this function stores remap_idxs and remap_loopends in the class for later use. this to avoid problems with yield @@ -1386,6 +1543,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # go through all iterators in lock-step, advance to next remap_idx srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps() # get four SVSHAPEs. here we are hard-coding + self.reset_remaps() SVSHAPE0 = self.spr['SVSHAPE0'] SVSHAPE1 = self.spr['SVSHAPE1'] SVSHAPE2 = self.spr['SVSHAPE2'] @@ -1397,8 +1555,6 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): (SVSHAPE3, SVSHAPE3.get_iterator()), ] - self.remap_loopends = [0] * 4 - self.remap_idxs = [0, 1, 2, 3] dbg = [] for i, (shape, remap) in enumerate(remaps): # zero is "disabled" @@ -1476,14 +1632,15 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # list of instructions not being supported by binutils (.long) dotstrp = asmop[:-1] if asmop[-1] == '.' else asmop if dotstrp in [*FPTRANS_INSNS, - 'ffmadds', 'fdmadds', 'ffadds', - 'mins', 'maxs', 'minu', 'maxu', - 'setvl', 'svindex', 'svremap', 'svstep', - 'svshape', 'svshape2', - 'grev', 'ternlogi', 'bmask', 'cprop', - 'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd', - 'fmvis', 'fishmv', - ]: + 'ffmadds', 'fdmadds', 'ffadds', + 'mins', 'maxs', 'minu', 'maxu', + 'setvl', 'svindex', 'svremap', 'svstep', + 'svshape', 'svshape2', + 'grev', 'ternlogi', 'bmask', 'cprop', + 'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd', + 'fmvis', 'fishmv', 'pcdec', "maddedu", "divmod2du", + "dsld", "dsrd", + ]: illegal = False ins_name = dotstrp @@ -1514,7 +1671,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): return # look up instruction in ISA.instrs, prepare namespace - info = self.instrs[ins_name] + if ins_name == 'pcdec': # grrrr yes there are others ("stbcx." etc.) + info = self.instrs[ins_name+"."] + else: + info = self.instrs[ins_name] yield from self.prep_namespace(ins_name, info.form, info.op_fields) # preserve order of register names @@ -1531,6 +1691,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): log("sv rm", sv_rm, dest_cr, src_cr, src_byname, dest_byname) # see if srcstep/dststep need skipping over masked-out predicate bits + self.reset_remaps() if (self.is_svp64_mode or ins_name in ['setvl', 'svremap', 'svstate']): yield from self.svstate_pre_inc() if self.is_svp64_mode: @@ -1613,14 +1774,17 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # the ALL/ANY mode we can early-exit if self.is_svp64_mode and ins_name.startswith("sv.bc"): no_in_vec = yield self.dec2.no_in_vec # BI is scalar - # XXX TODO - pack/unpack here end_loop = no_in_vec or srcstep == vl-1 or dststep == vl-1 self.namespace['end_loop'] = SelectableInt(end_loop, 1) # execute actual instruction here (finally) log("inputs", inputs) results = info.func(self, *inputs) - log("results", results) + output_names = create_args(info.write_regs) + outs = {} + for out, n in zip(results or [], output_names): + outs[n] = out + log("results", outs) # "inject" decorator takes namespace from function locals: we need to # overwrite NIA being overwritten (sigh) @@ -1641,78 +1805,106 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.last_st_addr, self.last_ld_addr) # detect if CA/CA32 already in outputs (sra*, basically) - already_done = 0 - if info.write_regs: - output_names = create_args(info.write_regs) - for name in output_names: - if name == 'CA': - already_done |= 1 - if name == 'CA32': - already_done |= 2 - - log("carry already done?", bin(already_done)) - if hasattr(self.dec2.e.do, "output_carry"): - carry_en = yield self.dec2.e.do.output_carry - else: - carry_en = False + ca = outs.get("CA") + ca32 = outs.get("CA32 ") + + log("carry already done?", ca, ca32, output_names) + carry_en = yield self.dec2.e.do.output_carry if carry_en: - yield from self.handle_carry_(inputs, results, already_done) + yield from self.handle_carry_(inputs, results[0], ca, ca32) # check if one of the regs was named "overflow" - overflow = None - if info.write_regs: - for name, output in zip(output_names, results): - if name == 'overflow': - overflow = output + overflow = outs.get('overflow') + # and one called CR0 + cr0 = outs.get('CR0') if not self.is_svp64_mode: # yeah just no. not in parallel processing # detect if overflow was in return result - if hasattr(self.dec2.e.do, "oe"): - ov_en = yield self.dec2.e.do.oe.oe - ov_ok = yield self.dec2.e.do.oe.ok - else: - ov_en = False - ov_ok = False + ov_en = yield self.dec2.e.do.oe.oe + ov_ok = yield self.dec2.e.do.oe.ok log("internal overflow", ins_name, overflow, "en?", ov_en, ov_ok) if ov_en & ov_ok: - yield from self.handle_overflow(inputs, results, overflow) + yield from self.handle_overflow(inputs, results[0], overflow) # only do SVP64 dest predicated Rc=1 if dest-pred is not enabled rc_en = False if not self.is_svp64_mode or not pred_dst_zero: if hasattr(self.dec2.e.do, "rc"): rc_en = yield self.dec2.e.do.rc.rc + # don't do Rc=1 for svstep it is handled explicitly. + # XXX TODO: now that CR0 is supported, sort out svstep's pseudocode + # to write directly to CR0 instead of in ISACaller. hooyahh. if rc_en and ins_name not in ['svstep']: - yield from self.do_rc_ov(ins_name, results, overflow) + yield from self.do_rc_ov(ins_name, results[0], overflow, cr0) + + # check failfirst + ffirst_hit = False + if self.is_svp64_mode: + ffirst_hit = (yield from self.check_ffirst(rc_en, srcstep)) # any modified return results? - yield from self.do_outregs_nia(asmop, ins_name, info, - output_names, results, - carry_en, rc_en) + yield from self.do_outregs_nia(asmop, ins_name, info, outs, + carry_en, rc_en, ffirst_hit) - def do_rc_ov(self, ins_name, results, overflow): + def check_ffirst(self, rc_en, srcstep): + rm_mode = yield self.dec2.rm_dec.mode + ff_inv = yield self.dec2.rm_dec.inv + cr_bit = yield self.dec2.rm_dec.cr_sel + RC1 = yield self.dec2.rm_dec.RC1 + vli = yield self.dec2.rm_dec.vli # VL inclusive if truncated + log(" ff rm_mode", rc_en, rm_mode, SVP64RMMode.FFIRST.value) + log(" inv", ff_inv) + log(" RC1", RC1) + log(" vli", vli) + log(" cr_bit", cr_bit) + if not rc_en or rm_mode != SVP64RMMode.FFIRST.value: + return False + regnum, is_vec = yield from get_pdecode_cr_out(self.dec2, "CR0") + crtest = self.crl[regnum] + ffirst_hit = crtest[cr_bit] != ff_inv + log("cr test", regnum, int(crtest), crtest, cr_bit, ff_inv) + log("cr test?", ffirst_hit) + if not ffirst_hit: + return False + vli = SelectableInt(int(vli), 7) + self.svstate.vl = srcstep + vli + yield self.dec2.state.svstate.eq(self.svstate.value) + yield Settle() # let decoder update + return True + + def do_rc_ov(self, ins_name, result, overflow, cr0): if ins_name.startswith("f"): - rc_reg = "CR1" # not calculated correctly yet (not FP compares) + rc_reg = "CR1" # not calculated correctly yet (not FP compares) else: rc_reg = "CR0" regnum, is_vec = yield from get_pdecode_cr_out(self.dec2, rc_reg) - cmps = results # hang on... for `setvl` actually you want to test SVSTATE.VL is_setvl = ins_name == 'setvl' if is_setvl: - vl = results[0].vl - cmps = (SelectableInt(vl, 64), overflow,) + result = SelectableInt(result.vl, 64) else: - overflow = None # do not override overflow except in setvl - self.handle_comparison(cmps, regnum, overflow, no_so=is_setvl) + overflow = None # do not override overflow except in setvl - def do_outregs_nia(self, asmop, ins_name, - info, output_names, results, carry_en, rc_en): - if info.write_regs: - for name, output in zip(output_names, results): - yield from self.check_write(info, name, output, carry_en) + # if there was not an explicit CR0 in the pseudocode, do implicit Rc=1 + if cr0 is None: + self.handle_comparison(result, regnum, overflow, no_so=is_setvl) + else: + # otherwise we just blat CR0 into the required regnum + log("explicit rc0", cr0) + self.crl[regnum].eq(cr0) + + def do_outregs_nia(self, asmop, ins_name, info, outs, + carry_en, rc_en, ffirst_hit): + # write out any regs for this instruction + for name, output in outs.items(): + yield from self.check_write(info, name, output, carry_en) - nia_update = (yield from self.check_step_increment(results, rc_en, + if ffirst_hit: + self.svp64_reset_loop() + nia_update = True + else: + # check advancement of src/dst/sub-steps and if PC needs updating + nia_update = (yield from self.check_step_increment(rc_en, asmop, ins_name)) if nia_update: self.update_pc_next() @@ -1845,6 +2037,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): def check_write(self, info, name, output, carry_en): if name == 'overflow': # ignore, done already (above) return + if name == 'CR0': # ignore, done already (above) + return if isinstance(output, int): output = SelectableInt(output, 256) # write carry flafs @@ -1884,7 +2078,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if self.is_svp64_mode and self.pred_dst_zero: log('zeroing reg %d %s' % (regnum, str(output)), is_vec) output = SelectableInt(0, 256) - log("write reg %s%d %0xx" % (reg_prefix, regnum, output.value)) + log("write reg %s%d 0x%x" % (reg_prefix, regnum, output.value), + kind=LogKind.InstrInOuts) # zero-extend tov64 bit begore storing (should use EXT oh well) if output.bits > 64: output = SelectableInt(output.value, 64) @@ -1893,7 +2088,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): else: self.gpr[regnum] = output - def check_step_increment(self, results, rc_en, asmop, ins_name): + def check_step_increment(self, rc_en, asmop, ins_name): # check if it is the SVSTATE.src/dest step that needs incrementing # this is our Sub-Program-Counter loop from 0 to VL-1 if not self.allow_next_step_inc: @@ -1925,30 +2120,24 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.update_nia() if not rc_en: return True - results = [SelectableInt(0, 64)] - self.handle_comparison(results) # CR0 + self.handle_comparison(SelectableInt(0, 64)) # CR0 return True if self.allow_next_step_inc == 2: log("SVSTATE_NEXT: read") nia_update = (yield from self.svstate_post_inc(ins_name)) else: log("SVSTATE_NEXT: post-inc") - # use actual src/dst-step here to check end, do NOT - # use bit-reversed version - srcstep, dststep = self.new_srcstep, self.new_dststep - ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep + # use actual (cached) src/dst-step here to check end remaps = self.get_remap_indices() remap_idxs = self.remap_idxs vl = self.svstate.vl subvl = yield self.dec2.rm_dec.rm_in.subvl - end_src = srcstep == vl-1 - end_dst = dststep == vl-1 if self.allow_next_step_inc != 2: - yield from self.advance_svstate_steps(end_src, end_dst) + yield from self.advance_svstate_steps() #self.namespace['SVSTATE'] = self.svstate.spr # set CR0 (if Rc=1) based on end + endtest = 1 if self.at_loopend() else 0 if rc_en: - endtest = 1 if (end_src or end_dst) else 0 #results = [SelectableInt(endtest, 64)] # self.handle_comparison(results) # CR0 @@ -1958,9 +2147,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): shape_idx = self.svstate_next_mode.value-1 endings = self.remap_loopends[shape_idx] cr_field = SelectableInt((~endings) << 1 | endtest, 4) - log("svstep Rc=1, CR0", cr_field) + log("svstep Rc=1, CR0", cr_field, endtest) self.crl[0].eq(cr_field) # CR0 - if end_src or end_dst: + if endtest: # reset at end of loop including exit Vertical Mode log("SVSTATE_NEXT: after increments, reset") self.svp64_reset_loop() @@ -1988,6 +2177,12 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if self.svstate_next_mode == 6: self.svstate_next_mode = 0 return SelectableInt(self.svstate.dststep, 7) + if self.svstate_next_mode == 7: + self.svstate_next_mode = 0 + return SelectableInt(self.svstate.ssubstep, 7) + if self.svstate_next_mode == 8: + self.svstate_next_mode = 0 + return SelectableInt(self.svstate.dsubstep, 7) return SelectableInt(0, 7) def get_src_dststeps(self): @@ -1996,19 +2191,23 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): return (self.new_srcstep, self.new_dststep, self.new_ssubstep, self.new_dsubstep) - def update_new_svstate_steps(self): - # note, do not get the bit-reversed srcstep here! - srcstep, dststep = self.new_srcstep, self.new_dststep - ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep - - # update SVSTATE with new srcstep - self.svstate.srcstep = srcstep - self.svstate.dststep = dststep - self.svstate.ssubstep = ssubstep - self.svstate.dsubstep = dsubstep + def update_svstate_namespace(self, overwrite_svstate=True): + if overwrite_svstate: + # note, do not get the bit-reversed srcstep here! + srcstep, dststep = self.new_srcstep, self.new_dststep + ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep + + # update SVSTATE with new srcstep + self.svstate.srcstep = srcstep + self.svstate.dststep = dststep + self.svstate.ssubstep = ssubstep + self.svstate.dsubstep = dsubstep self.namespace['SVSTATE'] = self.svstate yield self.dec2.state.svstate.eq(self.svstate.value) yield Settle() # let decoder update + + def update_new_svstate_steps(self, overwrite_svstate=True): + yield from self.update_svstate_namespace(overwrite_svstate) srcstep = self.svstate.srcstep dststep = self.svstate.dststep ssubstep = self.svstate.ssubstep @@ -2017,6 +2216,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): unpack = self.svstate.unpack vl = self.svstate.vl subvl = yield self.dec2.rm_dec.rm_in.subvl + rm_mode = yield self.dec2.rm_dec.mode + ff_inv = yield self.dec2.rm_dec.inv + cr_bit = yield self.dec2.rm_dec.cr_sel log(" srcstep", srcstep) log(" dststep", dststep) log(" pack", pack) @@ -2025,9 +2227,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): log(" dsubstep", dsubstep) log(" vl", vl) log(" subvl", subvl) + log(" rm_mode", rm_mode) + log(" inv", ff_inv) + log(" cr_bit", cr_bit) # check if end reached (we let srcstep overrun, above) # nothing needs doing (TODO zeroing): just do next instruction + if self.loopend: + return True return ((ssubstep == subvl and srcstep == vl) or (dsubstep == subvl and dststep == vl)) @@ -2086,12 +2293,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): else: svp64_is_vector = out_vec # loops end at the first "hit" (source or dest) - end_src = srcstep == vl-1 - end_dst = dststep == vl-1 - loopend = ((end_src and ssubstep == subvl) or - (end_dst and dsubstep == subvl)) - log("loopend", loopend, end_src, end_dst, - ssubstep == subvl, dsubstep == subvl) + yield from self.advance_svstate_steps() + loopend = self.loopend + log("loopend", svp64_is_vector, loopend) if not svp64_is_vector or loopend: # reset loop to zero and update NIA self.svp64_reset_loop() @@ -2100,7 +2304,6 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): return True # still looping, advance and update NIA - yield from self.advance_svstate_steps(end_src, end_dst) self.namespace['SVSTATE'] = self.svstate # not an SVP64 branch, so fix PC (NIA==CIA) for next loop @@ -2124,6 +2327,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.svstate.dststep = 0 self.svstate.ssubstep = 0 self.svstate.dsubstep = 0 + self.loopend = False log(" svstate.srcstep loop end (PC to update)") self.namespace['SVSTATE'] = self.svstate