X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fopenpower%2Fdecoder%2Fisa%2Fcaller.py;h=7f57d29397e47e1cae713da2497e4197346afee3;hb=4e908afaf2cc006c9b616acab873f9a98b36edc7;hp=d327d05a011379895d624947bae0166e9a334eb6;hpb=c2814018538c5f0761f1fe2d435e4fd7da6d4488;p=openpower-isa.git diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index d327d05a..7f57d293 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -13,29 +13,40 @@ related bugs: * https://bugs.libre-soc.org/show_bug.cgi?id=424 """ +import re from nmigen.back.pysim import Settle from functools import wraps -from copy import copy +from copy import copy, deepcopy from openpower.decoder.orderedset import OrderedSet -from openpower.decoder.selectable_int import (FieldSelectableInt, SelectableInt, - selectconcat) +from openpower.decoder.selectable_int import ( + SelectableIntMapping, + FieldSelectableInt, + SelectableInt, + selectconcat, +) from openpower.decoder.power_enums import (spr_dict, spr_byname, XER_bits, - insns, MicrOp, In1Sel, In2Sel, In3Sel, - OutSel, CROutSel, LDSTMode, - SVP64RMMode, SVP64PredMode, - SVP64PredInt, SVP64PredCR, - SVP64LDSTmode) + insns, MicrOp, + In1Sel, In2Sel, In3Sel, + OutSel, CRInSel, CROutSel, LDSTMode, + SVP64RMMode, SVP64PredMode, + SVP64PredInt, SVP64PredCR, + SVP64LDSTmode) from openpower.decoder.power_enums import SVPtype -from openpower.decoder.helpers import (exts, gtu, ltu, undefined, bitrev) +from openpower.decoder.helpers import (exts, gtu, ltu, undefined, + ISACallerHelper, ISAFPHelpers) from openpower.consts import PIb, MSRb # big-endian (PowerISA versions) -from openpower.consts import SVP64CROffs +from openpower.consts import (SVP64MODE, + SVP64CROffs, + ) from openpower.decoder.power_svp64 import SVP64RM, decode_extra from openpower.decoder.isa.radixmmu import RADIX from openpower.decoder.isa.mem import Mem, swap_order, MemException from openpower.decoder.isa.svshape import SVSHAPE +from openpower.decoder.isa.svstate import SVP64State + from openpower.util import log @@ -68,6 +79,7 @@ REG_SORT_ORDER = { "RB": 0, "RC": 0, "RS": 0, + "BI": 0, "CR": 0, "LR": 0, "CTR": 0, @@ -82,7 +94,7 @@ REG_SORT_ORDER = { "CA": 0, "CA32": 0, - "overflow": 7, # should definitely be last + "overflow": 7, # should definitely be last } fregs = ['FRA', 'FRB', 'FRC', 'FRS', 'FRT'] @@ -96,7 +108,6 @@ def create_args(reglist, extra=None): return retval - class GPR(dict): def __init__(self, decoder, isacaller, svstate, regfile): dict.__init__(self) @@ -107,11 +118,20 @@ class GPR(dict): self[i] = SelectableInt(regfile[i], 64) def __call__(self, ridx): + if isinstance(ridx, SelectableInt): + ridx = ridx.value return self[ridx] def set_form(self, form): self.form = form + def __setitem__(self, rnum, value): + # rnum = rnum.value # only SelectableInt allowed + log("GPR setitem", rnum, value) + if isinstance(rnum, SelectableInt): + rnum = rnum.value + dict.__setitem__(self, rnum, value) + def getz(self, rnum): # rnum = rnum.value # only SelectableInt allowed log("GPR getzero?", rnum) @@ -206,7 +226,7 @@ class SPR(dict): def dump(self, printout=True): res = [] keys = list(self.keys()) - #keys.sort() + # keys.sort() for k in keys: sprname = spr_dict.get(k, None) if sprname is None: @@ -223,7 +243,7 @@ class SPR(dict): class PC: def __init__(self, pc_init=0): self.CIA = SelectableInt(pc_init, 64) - self.NIA = self.CIA + SelectableInt(4, 64) # only true for v3.0B! + self.NIA = self.CIA + SelectableInt(4, 64) # only true for v3.0B! def update_nia(self, is_svp64): increment = 8 if is_svp64 else 4 @@ -238,70 +258,67 @@ class PC: namespace['NIA'] = self.NIA -# Simple-V: see https://libre-soc.org/openpower/sv -class SVP64State: - def __init__(self, init=0): - self.spr = SelectableInt(init, 32) - # fields of SVSTATE, see https://libre-soc.org/openpower/sv/sprs/ - self.maxvl = FieldSelectableInt(self.spr, tuple(range(0,7))) - self.vl = FieldSelectableInt(self.spr, tuple(range(7,14))) - self.srcstep = FieldSelectableInt(self.spr, tuple(range(14,21))) - self.dststep = FieldSelectableInt(self.spr, tuple(range(21,28))) - self.subvl = FieldSelectableInt(self.spr, tuple(range(28,30))) - self.svstep = FieldSelectableInt(self.spr, tuple(range(30,32))) - - # SVP64 ReMap field -class SVP64RMFields: - def __init__(self, init=0): - self.spr = SelectableInt(init, 24) - # SVP64 RM fields: see https://libre-soc.org/openpower/sv/svp64/ - self.mmode = FieldSelectableInt(self.spr, [0]) - self.mask = FieldSelectableInt(self.spr, tuple(range(1,4))) - self.elwidth = FieldSelectableInt(self.spr, tuple(range(4,6))) - self.ewsrc = FieldSelectableInt(self.spr, tuple(range(6,8))) - self.subvl = FieldSelectableInt(self.spr, tuple(range(8,10))) - self.extra = FieldSelectableInt(self.spr, tuple(range(10,19))) - self.mode = FieldSelectableInt(self.spr, tuple(range(19,24))) - # these cover the same extra field, split into parts as EXTRA2 - self.extra2 = list(range(4)) - self.extra2[0] = FieldSelectableInt(self.spr, tuple(range(10,12))) - self.extra2[1] = FieldSelectableInt(self.spr, tuple(range(12,14))) - self.extra2[2] = FieldSelectableInt(self.spr, tuple(range(14,16))) - self.extra2[3] = FieldSelectableInt(self.spr, tuple(range(16,18))) - self.smask = FieldSelectableInt(self.spr, tuple(range(16,19))) - # and here as well, but EXTRA3 - self.extra3 = list(range(3)) - self.extra3[0] = FieldSelectableInt(self.spr, tuple(range(10,13))) - self.extra3[1] = FieldSelectableInt(self.spr, tuple(range(13,16))) - self.extra3[2] = FieldSelectableInt(self.spr, tuple(range(16,19))) - - -SVP64RM_MMODE_SIZE = len(SVP64RMFields().mmode.br) -SVP64RM_MASK_SIZE = len(SVP64RMFields().mask.br) -SVP64RM_ELWIDTH_SIZE = len(SVP64RMFields().elwidth.br) -SVP64RM_EWSRC_SIZE = len(SVP64RMFields().ewsrc.br) -SVP64RM_SUBVL_SIZE = len(SVP64RMFields().subvl.br) -SVP64RM_EXTRA2_SPEC_SIZE = len(SVP64RMFields().extra2[0].br) -SVP64RM_EXTRA3_SPEC_SIZE = len(SVP64RMFields().extra3[0].br) -SVP64RM_SMASK_SIZE = len(SVP64RMFields().smask.br) -SVP64RM_MODE_SIZE = len(SVP64RMFields().mode.br) +class SVP64RMFields(SelectableIntMapping, bits=24, fields={ + "spr": range(24), + # SVP64 RM fields: see https://libre-soc.org/openpower/sv/svp64/ + "mmode": (0,), + "mask": range(1, 4), + "elwidth": range(4, 6), + "ewsrc": range(6, 8), + "subvl": range(8, 10), + "extra": range(10, 19), + "mode": range(19, 24), + # these cover the same extra field, split into parts as EXTRA2 + "extra2": dict(enumerate([ + range(10, 12), + range(12, 14), + range(14, 16), + range(16, 18), + ])), + "smask": range(16, 19), + # and here as well, but EXTRA3 + "extra3": dict(enumerate([ + range(10, 13), + range(13, 16), + range(16, 19), + ])), +}): + + def __init__(self, value=0): + super().__init__(value=value) + self.spr = self + + +SVP64RM_MMODE_SIZE = len(SVP64RMFields.mmode) +SVP64RM_MASK_SIZE = len(SVP64RMFields.mask) +SVP64RM_ELWIDTH_SIZE = len(SVP64RMFields.elwidth) +SVP64RM_EWSRC_SIZE = len(SVP64RMFields.ewsrc) +SVP64RM_SUBVL_SIZE = len(SVP64RMFields.subvl) +SVP64RM_EXTRA2_SPEC_SIZE = len(SVP64RMFields.extra2[0]) +SVP64RM_EXTRA3_SPEC_SIZE = len(SVP64RMFields.extra3[0]) +SVP64RM_SMASK_SIZE = len(SVP64RMFields.smask) +SVP64RM_MODE_SIZE = len(SVP64RMFields.mode) # SVP64 Prefix fields: see https://libre-soc.org/openpower/sv/svp64/ -class SVP64PrefixFields: - def __init__(self): - self.insn = SelectableInt(0, 32) - # 6 bit major opcode EXT001, 2 bits "identifying" (7, 9), 24 SV ReMap - self.major = FieldSelectableInt(self.insn, tuple(range(0,6))) - self.pid = FieldSelectableInt(self.insn, (7, 9)) # must be 0b11 - rmfields = [6, 8] + list(range(10,32)) # SVP64 24-bit RM (ReMap) - self.rm = FieldSelectableInt(self.insn, rmfields) +class SVP64PrefixFields(SelectableIntMapping, bits=32, fields={ + "insn": range(32), + # 6 bit major opcode EXT001, 2 bits "identifying" (7, 9), 24 SV ReMap + "major": range(0, 6), + "pid": (7, 9), + # SVP64 24-bit RM (ReMap) + "rm": ((6, 8) + tuple(range(10, 32))), +}): + + def __init__(self, value=0): + super().__init__(value=value) + self.insn = self -SV64P_MAJOR_SIZE = len(SVP64PrefixFields().major.br) -SV64P_PID_SIZE = len(SVP64PrefixFields().pid.br) -SV64P_RM_SIZE = len(SVP64PrefixFields().rm.br) +SV64P_MAJOR_SIZE = len(SVP64PrefixFields.major) +SV64P_PID_SIZE = len(SVP64PrefixFields.pid) +SV64P_RM_SIZE = len(SVP64PrefixFields.rm) # CR register fields @@ -325,12 +342,14 @@ class CRFields: self.crl.append(_cr) # decode SVP64 predicate integer to reg number and invert + + def get_predint(gpr, mask): r10 = gpr(10) r30 = gpr(30) - log ("get_predint", mask, SVP64PredInt.ALWAYS.value) + log("get_predint", mask, SVP64PredInt.ALWAYS.value) if mask == SVP64PredInt.ALWAYS.value: - return 0xffff_ffff_ffff_ffff + return 0xffff_ffff_ffff_ffff # 64 bits of 1 if mask == SVP64PredInt.R3_UNARY.value: return 1 << (gpr(3).value & 0b111111) if mask == SVP64PredInt.R3.value: @@ -347,6 +366,8 @@ def get_predint(gpr, mask): return ~gpr(30).value # decode SVP64 predicate CR to reg number and invert status + + def _get_predcr(mask): if mask == SVP64PredCR.LT.value: return 0, 1 @@ -367,13 +388,15 @@ def _get_predcr(mask): # read individual CR fields (0..VL-1), extract the required bit # and construct the mask + + def get_predcr(crl, mask, vl): idx, noninv = _get_predcr(mask) mask = 0 for i in range(vl): cr = crl[i+SVP64CROffs.CRPred] if cr[idx].value == noninv: - mask |= (1<> 2 + self.namespace['DS'] = imm + else: + self.namespace['D'] = imm # "special" registers for special in info.special_regs: @@ -1328,6 +1558,14 @@ class ISACaller: # clear trap (trap) NIA self.trap_nia = None + # check if this was an sv.bc* and create an indicator that + # this is the last check to be made as a loop. combined with + # the ALL/ANY mode we can early-exit + if self.is_svp64_mode and ins_name.startswith("sv.bc"): + no_in_vec = yield self.dec2.no_in_vec # BI is scalar + end_loop = no_in_vec or srcstep == vl-1 or dststep == vl-1 + self.namespace['end_loop'] = SelectableInt(end_loop, 1) + # execute actual instruction here (finally) log("inputs", inputs) results = info.func(self, *inputs) @@ -1343,13 +1581,13 @@ class ISACaller: # check if op was a LD/ST so that debugging can check the # address if int_op in [MicrOp.OP_STORE.value, - ]: + ]: self.last_st_addr = self.mem.last_st_addr if int_op in [MicrOp.OP_LOAD.value, - ]: + ]: self.last_ld_addr = self.mem.last_ld_addr - log ("op", int_op, MicrOp.OP_STORE.value, MicrOp.OP_LOAD.value, - self.last_st_addr, self.last_ld_addr) + log("op", int_op, MicrOp.OP_STORE.value, MicrOp.OP_LOAD.value, + self.last_st_addr, self.last_ld_addr) # detect if CA/CA32 already in outputs (sra*, basically) already_done = 0 @@ -1369,7 +1607,7 @@ class ISACaller: if carry_en: yield from self.handle_carry_(inputs, results, already_done) - if not self.is_svp64_mode: # yeah just no. not in parallel processing + if not self.is_svp64_mode: # yeah just no. not in parallel processing # detect if overflow was in return result overflow = None if info.write_regs: @@ -1392,7 +1630,7 @@ class ISACaller: if not self.is_svp64_mode or not pred_dst_zero: if hasattr(self.dec2.e.do, "rc"): rc_en = yield self.dec2.e.do.rc.rc - if rc_en: + if rc_en and ins_name not in ['svstep']: regnum, is_vec = yield from get_pdecode_cr_out(self.dec2, "CR0") self.handle_comparison(results, regnum) @@ -1419,25 +1657,25 @@ class ISACaller: log('msr written', hex(self.msr.value)) else: regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, - name) + name) if regnum is None: regnum, is_vec = yield from get_pdecode_idx_out2( - self.dec2, name) + self.dec2, name) if regnum is None: # temporary hack for not having 2nd output regnum = yield getattr(self.decoder, name) is_vec = False if self.is_svp64_mode and pred_dst_zero: log('zeroing reg %d %s' % (regnum, str(output)), - is_vec) + is_vec) output = SelectableInt(0, 256) else: if name in fregs: ftype = 'fpr' else: ftype = 'gpr' - log('writing %s %s %s' % (regnum, ftype, str(output)), - is_vec) + log('writing %s %s %s' % (ftype, regnum, str(output)), + is_vec) if output.bits > 64: output = SelectableInt(output.value, 64) if name in fregs: @@ -1447,70 +1685,275 @@ class ISACaller: # check if it is the SVSTATE.src/dest step that needs incrementing # this is our Sub-Program-Counter loop from 0 to VL-1 - if self.is_svp64_mode: - # XXX twin predication TODO - vl = self.svstate.vl.asint(msb0=True) - mvl = self.svstate.maxvl.asint(msb0=True) - srcstep = self.svstate.srcstep.asint(msb0=True) - dststep = self.svstate.dststep.asint(msb0=True) - rm_mode = yield self.dec2.rm_dec.mode - reverse_gear = yield self.dec2.rm_dec.reverse_gear - sv_ptype = yield self.dec2.dec.op.SV_Ptype - out_vec = not (yield self.dec2.no_out_vec) - in_vec = not (yield self.dec2.no_in_vec) - log (" svstate.vl", vl) - log (" svstate.mvl", mvl) - log (" svstate.srcstep", srcstep) - log (" svstate.dststep", dststep) - log (" mode", rm_mode) - log (" reverse", reverse_gear) - log (" out_vec", out_vec) - log (" in_vec", in_vec) - log (" sv_ptype", sv_ptype, sv_ptype == SVPtype.P2.value) - # check if srcstep needs incrementing by one, stop PC advancing - # svp64 loop can end early if the dest is scalar for single-pred - # but for 2-pred both src/dest have to be checked. - # XXX this might not be true! it may just be LD/ST - if sv_ptype == SVPtype.P2.value: - svp64_is_vector = (out_vec or in_vec) + pre = False + post = False + nia_update = True + if self.allow_next_step_inc: + log("SVSTATE_NEXT: inc requested, mode", + self.svstate_next_mode, self.allow_next_step_inc) + yield from self.svstate_pre_inc() + pre = yield from self.update_new_svstate_steps() + if pre: + # reset at end of loop including exit Vertical Mode + log("SVSTATE_NEXT: end of loop, reset") + self.svp64_reset_loop() + self.svstate.vfirst = 0 + self.update_nia() + if rc_en: + results = [SelectableInt(0, 64)] + self.handle_comparison(results) # CR0 else: - svp64_is_vector = out_vec - if svp64_is_vector and srcstep != vl-1 and dststep != vl-1: - self.svstate.srcstep += SelectableInt(1, 7) - self.svstate.dststep += SelectableInt(1, 7) - self.pc.NIA.value = self.pc.CIA.value - self.namespace['NIA'] = self.pc.NIA + if self.allow_next_step_inc == 2: + log("SVSTATE_NEXT: read") + nia_update = (yield from self.svstate_post_inc(ins_name)) + else: + log("SVSTATE_NEXT: post-inc") + # use actual src/dst-step here to check end, do NOT + # use bit-reversed version + srcstep, dststep = self.new_srcstep, self.new_dststep + remaps = self.get_remap_indices() + remap_idxs = self.remap_idxs + vl = self.svstate.vl + end_src = srcstep == vl-1 + end_dst = dststep == vl-1 + if self.allow_next_step_inc != 2: + if not end_src: + self.svstate.srcstep += SelectableInt(1, 7) + if not end_dst: + self.svstate.dststep += SelectableInt(1, 7) self.namespace['SVSTATE'] = self.svstate.spr - log("end of sub-pc call", self.namespace['CIA'], - self.namespace['NIA']) - return # DO NOT allow PC to update whilst Sub-PC loop running - # reset loop to zero - self.svp64_reset_loop() + # set CR0 (if Rc=1) based on end + if rc_en: + srcstep = self.svstate.srcstep + dststep = self.svstate.srcstep + endtest = 1 if (end_src or end_dst) else 0 + #results = [SelectableInt(endtest, 64)] + # self.handle_comparison(results) # CR0 + + # see if svstep was requested, if so, which SVSTATE + endings = 0b111 + if self.svstate_next_mode > 0: + shape_idx = self.svstate_next_mode.value-1 + endings = self.remap_loopends[shape_idx] + cr_field = SelectableInt((~endings) << 1 | endtest, 4) + print("svstep Rc=1, CR0", cr_field) + self.crl[0].eq(cr_field) # CR0 + if end_src or end_dst: + # reset at end of loop including exit Vertical Mode + log("SVSTATE_NEXT: after increments, reset") + self.svp64_reset_loop() + self.svstate.vfirst = 0 + + elif self.is_svp64_mode: + nia_update = (yield from self.svstate_post_inc(ins_name)) + else: + # XXX only in non-SVP64 mode! + # record state of whether the current operation was an svshape, + # to be able to know if it should apply in the next instruction. + # also (if going to use this instruction) should disable ability + # to interrupt in between. sigh. + self.last_op_svshape = asmop == 'svremap' + + if nia_update: + self.update_pc_next() - # XXX only in non-SVP64 mode! - # record state of whether the current operation was an svshape, - # to be able to know if it should apply in the next instruction. - # also (if going to use this instruction) should disable ability - # to interrupt in between. sigh. - self.last_op_svshape = asmop == 'svremap' + def SVSTATE_NEXT(self, mode, submode): + """explicitly moves srcstep/dststep on to next element, for + "Vertical-First" mode. this function is called from + setvl pseudo-code, as a pseudo-op "svstep" - self.update_pc_next() + WARNING: this function uses information that was created EARLIER + due to it being in the middle of a yield, but this function is + *NOT* called from yield (it's called from compiled pseudocode). + """ + self.allow_next_step_inc = submode.value + 1 + log("SVSTATE_NEXT mode", mode, submode, self.allow_next_step_inc) + self.svstate_next_mode = mode + if self.svstate_next_mode > 0 and self.svstate_next_mode < 5: + shape_idx = self.svstate_next_mode.value-1 + return SelectableInt(self.remap_idxs[shape_idx], 7) + if self.svstate_next_mode == 5: + self.svstate_next_mode = 0 + return SelectableInt(self.svstate.srcstep, 7) + if self.svstate_next_mode == 6: + self.svstate_next_mode = 0 + return SelectableInt(self.svstate.dststep, 7) + return SelectableInt(0, 7) + + def svstate_pre_inc(self): + """check if srcstep/dststep need to skip over masked-out predicate bits + """ + # get SVSTATE VL (oh and print out some debug stuff) + vl = self.svstate.vl + srcstep = self.svstate.srcstep + dststep = self.svstate.dststep + sv_a_nz = yield self.dec2.sv_a_nz + fft_mode = yield self.dec2.use_svp64_fft + in1 = yield self.dec2.e.read_reg1.data + log("SVP64: VL, srcstep, dststep, sv_a_nz, in1 fft, svp64", + vl, srcstep, dststep, sv_a_nz, in1, fft_mode, + self.is_svp64_mode) + + # get predicate mask (all 64 bits) + srcmask = dstmask = 0xffff_ffff_ffff_ffff + + pmode = yield self.dec2.rm_dec.predmode + reverse_gear = yield self.dec2.rm_dec.reverse_gear + sv_ptype = yield self.dec2.dec.op.SV_Ptype + srcpred = yield self.dec2.rm_dec.srcpred + dstpred = yield self.dec2.rm_dec.dstpred + pred_src_zero = yield self.dec2.rm_dec.pred_sz + pred_dst_zero = yield self.dec2.rm_dec.pred_dz + if pmode == SVP64PredMode.INT.value: + srcmask = dstmask = get_predint(self.gpr, dstpred) + if sv_ptype == SVPtype.P2.value: + srcmask = get_predint(self.gpr, srcpred) + elif pmode == SVP64PredMode.CR.value: + srcmask = dstmask = get_predcr(self.crl, dstpred, vl) + if sv_ptype == SVPtype.P2.value: + srcmask = get_predcr(self.crl, srcpred, vl) + log(" pmode", pmode) + log(" reverse", reverse_gear) + log(" ptype", sv_ptype) + log(" srcpred", bin(srcpred)) + log(" dstpred", bin(dstpred)) + log(" srcmask", bin(srcmask)) + log(" dstmask", bin(dstmask)) + log(" pred_sz", bin(pred_src_zero)) + log(" pred_dz", bin(pred_dst_zero)) + + # okaaay, so here we simply advance srcstep (TODO dststep) + # until the predicate mask has a "1" bit... or we run out of VL + # let srcstep==VL be the indicator to move to next instruction + if not pred_src_zero: + while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl): + log(" skip", bin(1 << srcstep)) + srcstep += 1 + # same for dststep + if not pred_dst_zero: + while (((1 << dststep) & dstmask) == 0) and (dststep != vl): + log(" skip", bin(1 << dststep)) + dststep += 1 + + # now work out if the relevant mask bits require zeroing + if pred_dst_zero: + pred_dst_zero = ((1 << dststep) & dstmask) == 0 + if pred_src_zero: + pred_src_zero = ((1 << srcstep) & srcmask) == 0 + + # store new srcstep / dststep + self.new_srcstep, self.new_dststep = srcstep, dststep + self.pred_dst_zero, self.pred_src_zero = pred_dst_zero, pred_src_zero + log(" new srcstep", srcstep) + log(" new dststep", dststep) + + def get_src_dststeps(self): + """gets srcstep and dststep + """ + return self.new_srcstep, self.new_dststep + + def update_new_svstate_steps(self): + # note, do not get the bit-reversed srcstep here! + srcstep, dststep = self.new_srcstep, self.new_dststep + + # update SVSTATE with new srcstep + self.svstate.srcstep = srcstep + self.svstate.dststep = dststep + self.namespace['SVSTATE'] = self.svstate + yield self.dec2.state.svstate.eq(self.svstate.value) + yield Settle() # let decoder update + srcstep = self.svstate.srcstep + dststep = self.svstate.dststep + vl = self.svstate.vl + log(" srcstep", srcstep) + log(" dststep", dststep) + log(" vl", vl) + + # check if end reached (we let srcstep overrun, above) + # nothing needs doing (TODO zeroing): just do next instruction + return srcstep == vl or dststep == vl + + def svstate_post_inc(self, insn_name, vf=0): + # check if SV "Vertical First" mode is enabled + vfirst = self.svstate.vfirst + log(" SV Vertical First", vf, vfirst) + if not vf and vfirst == 1: + self.update_nia() + return True + + # check if it is the SVSTATE.src/dest step that needs incrementing + # this is our Sub-Program-Counter loop from 0 to VL-1 + # XXX twin predication TODO + vl = self.svstate.vl + mvl = self.svstate.maxvl + srcstep = self.svstate.srcstep + dststep = self.svstate.dststep + rm_mode = yield self.dec2.rm_dec.mode + reverse_gear = yield self.dec2.rm_dec.reverse_gear + sv_ptype = yield self.dec2.dec.op.SV_Ptype + out_vec = not (yield self.dec2.no_out_vec) + in_vec = not (yield self.dec2.no_in_vec) + log(" svstate.vl", vl) + log(" svstate.mvl", mvl) + log(" svstate.srcstep", srcstep) + log(" svstate.dststep", dststep) + log(" mode", rm_mode) + log(" reverse", reverse_gear) + log(" out_vec", out_vec) + log(" in_vec", in_vec) + log(" sv_ptype", sv_ptype, sv_ptype == SVPtype.P2.value) + # check if srcstep needs incrementing by one, stop PC advancing + # svp64 loop can end early if the dest is scalar for single-pred + # but for 2-pred both src/dest have to be checked. + # XXX this might not be true! it may just be LD/ST + if sv_ptype == SVPtype.P2.value: + svp64_is_vector = (out_vec or in_vec) + else: + svp64_is_vector = out_vec + # check if this was an sv.bc* and if so did it succeed + if self.is_svp64_mode and insn_name.startswith("sv.bc"): + end_loop = self.namespace['end_loop'] + log("branch %s end_loop" % insn_name, end_loop) + if end_loop.value: + self.svp64_reset_loop() + self.update_pc_next() + return False + if svp64_is_vector and srcstep != vl-1 and dststep != vl-1: + self.svstate.srcstep += SelectableInt(1, 7) + self.svstate.dststep += SelectableInt(1, 7) + self.namespace['SVSTATE'] = self.svstate + # not an SVP64 branch, so fix PC (NIA==CIA) for next loop + # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64) + # this way we keep repeating the same instruction (with new steps) + self.pc.NIA.value = self.pc.CIA.value + self.namespace['NIA'] = self.pc.NIA + log("end of sub-pc call", self.namespace['CIA'], + self.namespace['NIA']) + return False # DO NOT allow PC update whilst Sub-PC loop running + + # reset loop to zero and update NIA + self.svp64_reset_loop() + self.update_nia() + + return True def update_pc_next(self): # UPDATE program counter self.pc.update(self.namespace, self.is_svp64_mode) self.svstate.spr = self.namespace['SVSTATE'] log("end of call", self.namespace['CIA'], - self.namespace['NIA'], - self.namespace['SVSTATE']) + self.namespace['NIA'], + self.namespace['SVSTATE']) def svp64_reset_loop(self): - self.svstate.srcstep[0:7] = 0 - self.svstate.dststep[0:7] = 0 - log (" svstate.srcstep loop end (PC to update)") + self.svstate.srcstep = 0 + self.svstate.dststep = 0 + log(" svstate.srcstep loop end (PC to update)") + self.namespace['SVSTATE'] = self.svstate + + def update_nia(self): self.pc.update_nia(self.is_svp64_mode) self.namespace['NIA'] = self.pc.NIA - self.namespace['SVSTATE'] = self.svstate.spr def inject(): @@ -1535,12 +1978,15 @@ def inject(): context = args[0].namespace # variables to be injected saved_values = func_globals.copy() # Shallow copy of dict. + log("globals before", context.keys()) func_globals.update(context) result = func(*args, **kwargs) log("globals after", func_globals['CIA'], func_globals['NIA']) log("args[0]", args[0].namespace['CIA'], - args[0].namespace['NIA'], - args[0].namespace['SVSTATE']) + args[0].namespace['NIA'], + args[0].namespace['SVSTATE']) + if 'end_loop' in func_globals: + log("args[0] end_loop", func_globals['end_loop']) args[0].namespace = func_globals #exec (func.__code__, func_globals) @@ -1552,5 +1998,3 @@ def inject(): return decorator return variable_injector - -