X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fopenpower%2Fdecoder%2Fisa%2Fcaller.py;h=e51d24f62a731e006e0cab22f7ce1615c80964d4;hb=e0518a4fa19ea3bba436154de8b7d66313caec28;hp=1ee2b51aa52a2807067565162cfa67b91e3ae1b4;hpb=fba9451037447ad3f50619da6c39463c2051da8d;p=openpower-isa.git diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index 1ee2b51a..e51d24f6 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -13,29 +13,40 @@ related bugs: * https://bugs.libre-soc.org/show_bug.cgi?id=424 """ +import re from nmigen.back.pysim import Settle from functools import wraps -from copy import copy +from copy import copy, deepcopy from openpower.decoder.orderedset import OrderedSet -from openpower.decoder.selectable_int import (FieldSelectableInt, SelectableInt, - selectconcat) +from openpower.decoder.selectable_int import ( + SelectableIntMapping, + FieldSelectableInt, + SelectableInt, + selectconcat, +) from openpower.decoder.power_enums import (spr_dict, spr_byname, XER_bits, - insns, MicrOp, In1Sel, In2Sel, In3Sel, - OutSel, CROutSel, LDSTMode, - SVP64RMMode, SVP64PredMode, - SVP64PredInt, SVP64PredCR, - SVP64LDSTmode) + insns, MicrOp, + In1Sel, In2Sel, In3Sel, + OutSel, CRInSel, CROutSel, LDSTMode, + SVP64RMMode, SVP64PredMode, + SVP64PredInt, SVP64PredCR, + SVP64LDSTmode) from openpower.decoder.power_enums import SVPtype -from openpower.decoder.helpers import (exts, gtu, ltu, undefined, bitrev) +from openpower.decoder.helpers import (exts, gtu, ltu, undefined, + ISACallerHelper, ISAFPHelpers) from openpower.consts import PIb, MSRb # big-endian (PowerISA versions) -from openpower.consts import SVP64CROffs +from openpower.consts import (SVP64MODE, + SVP64CROffs, + ) from openpower.decoder.power_svp64 import SVP64RM, decode_extra from openpower.decoder.isa.radixmmu import RADIX from openpower.decoder.isa.mem import Mem, swap_order, MemException from openpower.decoder.isa.svshape import SVSHAPE +from openpower.decoder.isa.svstate import SVP64State + from openpower.util import log @@ -68,6 +79,7 @@ REG_SORT_ORDER = { "RB": 0, "RC": 0, "RS": 0, + "BI": 0, "CR": 0, "LR": 0, "CTR": 0, @@ -82,7 +94,7 @@ REG_SORT_ORDER = { "CA": 0, "CA32": 0, - "overflow": 7, # should definitely be last + "overflow": 7, # should definitely be last } fregs = ['FRA', 'FRB', 'FRC', 'FRS', 'FRT'] @@ -96,7 +108,6 @@ def create_args(reglist, extra=None): return retval - class GPR(dict): def __init__(self, decoder, isacaller, svstate, regfile): dict.__init__(self) @@ -107,11 +118,20 @@ class GPR(dict): self[i] = SelectableInt(regfile[i], 64) def __call__(self, ridx): + if isinstance(ridx, SelectableInt): + ridx = ridx.value return self[ridx] def set_form(self, form): self.form = form + def __setitem__(self, rnum, value): + # rnum = rnum.value # only SelectableInt allowed + log("GPR setitem", rnum, value) + if isinstance(rnum, SelectableInt): + rnum = rnum.value + dict.__setitem__(self, rnum, value) + def getz(self, rnum): # rnum = rnum.value # only SelectableInt allowed log("GPR getzero?", rnum) @@ -206,7 +226,7 @@ class SPR(dict): def dump(self, printout=True): res = [] keys = list(self.keys()) - #keys.sort() + # keys.sort() for k in keys: sprname = spr_dict.get(k, None) if sprname is None: @@ -223,7 +243,7 @@ class SPR(dict): class PC: def __init__(self, pc_init=0): self.CIA = SelectableInt(pc_init, 64) - self.NIA = self.CIA + SelectableInt(4, 64) # only true for v3.0B! + self.NIA = self.CIA + SelectableInt(4, 64) # only true for v3.0B! def update_nia(self, is_svp64): increment = 8 if is_svp64 else 4 @@ -238,70 +258,67 @@ class PC: namespace['NIA'] = self.NIA -# Simple-V: see https://libre-soc.org/openpower/sv -class SVP64State: - def __init__(self, init=0): - self.spr = SelectableInt(init, 32) - # fields of SVSTATE, see https://libre-soc.org/openpower/sv/sprs/ - self.maxvl = FieldSelectableInt(self.spr, tuple(range(0,7))) - self.vl = FieldSelectableInt(self.spr, tuple(range(7,14))) - self.srcstep = FieldSelectableInt(self.spr, tuple(range(14,21))) - self.dststep = FieldSelectableInt(self.spr, tuple(range(21,28))) - self.subvl = FieldSelectableInt(self.spr, tuple(range(28,30))) - self.svstep = FieldSelectableInt(self.spr, tuple(range(30,32))) - - # SVP64 ReMap field -class SVP64RMFields: - def __init__(self, init=0): - self.spr = SelectableInt(init, 24) - # SVP64 RM fields: see https://libre-soc.org/openpower/sv/svp64/ - self.mmode = FieldSelectableInt(self.spr, [0]) - self.mask = FieldSelectableInt(self.spr, tuple(range(1,4))) - self.elwidth = FieldSelectableInt(self.spr, tuple(range(4,6))) - self.ewsrc = FieldSelectableInt(self.spr, tuple(range(6,8))) - self.subvl = FieldSelectableInt(self.spr, tuple(range(8,10))) - self.extra = FieldSelectableInt(self.spr, tuple(range(10,19))) - self.mode = FieldSelectableInt(self.spr, tuple(range(19,24))) - # these cover the same extra field, split into parts as EXTRA2 - self.extra2 = list(range(4)) - self.extra2[0] = FieldSelectableInt(self.spr, tuple(range(10,12))) - self.extra2[1] = FieldSelectableInt(self.spr, tuple(range(12,14))) - self.extra2[2] = FieldSelectableInt(self.spr, tuple(range(14,16))) - self.extra2[3] = FieldSelectableInt(self.spr, tuple(range(16,18))) - self.smask = FieldSelectableInt(self.spr, tuple(range(16,19))) - # and here as well, but EXTRA3 - self.extra3 = list(range(3)) - self.extra3[0] = FieldSelectableInt(self.spr, tuple(range(10,13))) - self.extra3[1] = FieldSelectableInt(self.spr, tuple(range(13,16))) - self.extra3[2] = FieldSelectableInt(self.spr, tuple(range(16,19))) - - -SVP64RM_MMODE_SIZE = len(SVP64RMFields().mmode.br) -SVP64RM_MASK_SIZE = len(SVP64RMFields().mask.br) -SVP64RM_ELWIDTH_SIZE = len(SVP64RMFields().elwidth.br) -SVP64RM_EWSRC_SIZE = len(SVP64RMFields().ewsrc.br) -SVP64RM_SUBVL_SIZE = len(SVP64RMFields().subvl.br) -SVP64RM_EXTRA2_SPEC_SIZE = len(SVP64RMFields().extra2[0].br) -SVP64RM_EXTRA3_SPEC_SIZE = len(SVP64RMFields().extra3[0].br) -SVP64RM_SMASK_SIZE = len(SVP64RMFields().smask.br) -SVP64RM_MODE_SIZE = len(SVP64RMFields().mode.br) +class SVP64RMFields(SelectableIntMapping, bits=24, fields={ + "spr": range(24), + # SVP64 RM fields: see https://libre-soc.org/openpower/sv/svp64/ + "mmode": (0,), + "mask": range(1, 4), + "elwidth": range(4, 6), + "ewsrc": range(6, 8), + "subvl": range(8, 10), + "extra": range(10, 19), + "mode": range(19, 24), + # these cover the same extra field, split into parts as EXTRA2 + "extra2": dict(enumerate([ + range(10, 12), + range(12, 14), + range(14, 16), + range(16, 18), + ])), + "smask": range(16, 19), + # and here as well, but EXTRA3 + "extra3": dict(enumerate([ + range(10, 13), + range(13, 16), + range(16, 19), + ])), +}): + + def __init__(self, value=0): + super().__init__(value=value) + self.spr = self + + +SVP64RM_MMODE_SIZE = len(SVP64RMFields.mmode) +SVP64RM_MASK_SIZE = len(SVP64RMFields.mask) +SVP64RM_ELWIDTH_SIZE = len(SVP64RMFields.elwidth) +SVP64RM_EWSRC_SIZE = len(SVP64RMFields.ewsrc) +SVP64RM_SUBVL_SIZE = len(SVP64RMFields.subvl) +SVP64RM_EXTRA2_SPEC_SIZE = len(SVP64RMFields.extra2[0]) +SVP64RM_EXTRA3_SPEC_SIZE = len(SVP64RMFields.extra3[0]) +SVP64RM_SMASK_SIZE = len(SVP64RMFields.smask) +SVP64RM_MODE_SIZE = len(SVP64RMFields.mode) # SVP64 Prefix fields: see https://libre-soc.org/openpower/sv/svp64/ -class SVP64PrefixFields: - def __init__(self): - self.insn = SelectableInt(0, 32) - # 6 bit major opcode EXT001, 2 bits "identifying" (7, 9), 24 SV ReMap - self.major = FieldSelectableInt(self.insn, tuple(range(0,6))) - self.pid = FieldSelectableInt(self.insn, (7, 9)) # must be 0b11 - rmfields = [6, 8] + list(range(10,32)) # SVP64 24-bit RM (ReMap) - self.rm = FieldSelectableInt(self.insn, rmfields) +class SVP64PrefixFields(SelectableIntMapping, bits=32, fields={ + "insn": range(32), + # 6 bit major opcode EXT001, 2 bits "identifying" (7, 9), 24 SV ReMap + "major": range(0, 6), + "pid": (7, 9), + # SVP64 24-bit RM (ReMap) + "rm": ((6, 8) + tuple(range(10, 32))), +}): + + def __init__(self, value=0): + super().__init__(value=value) + self.insn = self -SV64P_MAJOR_SIZE = len(SVP64PrefixFields().major.br) -SV64P_PID_SIZE = len(SVP64PrefixFields().pid.br) -SV64P_RM_SIZE = len(SVP64PrefixFields().rm.br) +SV64P_MAJOR_SIZE = len(SVP64PrefixFields.major) +SV64P_PID_SIZE = len(SVP64PrefixFields.pid) +SV64P_RM_SIZE = len(SVP64PrefixFields.rm) # CR register fields @@ -325,12 +342,14 @@ class CRFields: self.crl.append(_cr) # decode SVP64 predicate integer to reg number and invert + + def get_predint(gpr, mask): r10 = gpr(10) r30 = gpr(30) - log ("get_predint", mask, SVP64PredInt.ALWAYS.value) + log("get_predint", mask, SVP64PredInt.ALWAYS.value) if mask == SVP64PredInt.ALWAYS.value: - return 0xffff_ffff_ffff_ffff + return 0xffff_ffff_ffff_ffff # 64 bits of 1 if mask == SVP64PredInt.R3_UNARY.value: return 1 << (gpr(3).value & 0b111111) if mask == SVP64PredInt.R3.value: @@ -347,6 +366,8 @@ def get_predint(gpr, mask): return ~gpr(30).value # decode SVP64 predicate CR to reg number and invert status + + def _get_predcr(mask): if mask == SVP64PredCR.LT.value: return 0, 1 @@ -367,13 +388,15 @@ def _get_predcr(mask): # read individual CR fields (0..VL-1), extract the required bit # and construct the mask + + def get_predcr(crl, mask, vl): idx, noninv = _get_predcr(mask) mask = 0 for i in range(vl): cr = crl[i+SVP64CROffs.CRPred] if cr[idx].value == noninv: - mask |= (1<> 2 + self.namespace['DS'] = imm + else: + self.namespace['D'] = imm + + def get_input(self, name): + # using PowerDecoder2, first, find the decoder index. + # (mapping name RA RB RC RS to in1, in2, in3) + regnum, is_vec = yield from get_pdecode_idx_in(self.dec2, name) + if regnum is None: + # doing this is not part of svp64, it's because output + # registers, to be modified, need to be in the namespace. + regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, name) + if regnum is None: + regnum, is_vec = yield from get_pdecode_idx_out2(self.dec2, name) + + # in case getting the register number is needed, _RA, _RB + regname = "_" + name + self.namespace[regname] = regnum + if not self.is_svp64_mode or not self.pred_src_zero: + log('reading reg %s %s' % (name, str(regnum)), is_vec) + if name in fregs: + reg_val = SelectableInt(self.fpr(regnum)) + elif name is not None: + reg_val = SelectableInt(self.gpr(regnum)) + else: + log('zero input reg %s %s' % (name, str(regnum)), is_vec) + reg_val = 0 + return reg_val + + def remap_debug(self, remaps): + # just some convenient debug info + for i in range(4): + sname = 'SVSHAPE%d' % i + shape = self.spr[sname] + log(sname, bin(shape.value)) + log(" lims", shape.lims) + log(" mode", shape.mode) + log(" skip", shape.skip) + + # set up the list of steps to remap + mi0 = self.svstate.mi0 + mi1 = self.svstate.mi1 + mi2 = self.svstate.mi2 + mo0 = self.svstate.mo0 + mo1 = self.svstate.mo1 + steps = [(self.dec2.in1_step, mi0), # RA + (self.dec2.in2_step, mi1), # RB + (self.dec2.in3_step, mi2), # RC + (self.dec2.o_step, mo0), # RT + (self.dec2.o2_step, mo1), # EA + ] + remap_idxs = self.remap_idxs + rremaps = [] + # now cross-index the required SHAPE for each of 3-in 2-out regs + rnames = ['RA', 'RB', 'RC', 'RT', 'EA'] + for i, (dstep, shape_idx) in enumerate(steps): + (shape, remap) = remaps[shape_idx] + remap_idx = remap_idxs[shape_idx] + # zero is "disabled" + if shape.value == 0x0: + continue + # now set the actual requested step to the current index + yield dstep.eq(remap_idx) + + # debug printout info + rremaps.append((shape.mode, i, rnames[i], shape_idx, remap_idx)) + for x in rremaps: + log("shape remap", x) + + def check_write(self, info, name, output, carry_en): + if name == 'overflow': # ignore, done already (above) + return + if isinstance(output, int): + output = SelectableInt(output, 256) + if name in ['CA', 'CA32']: + if carry_en: + log("writing %s to XER" % name, output) + self.spr['XER'][XER_bits[name]] = output.value + else: + log("NOT writing %s to XER" % name, output) + elif name in info.special_regs: + log('writing special %s' % name, output, special_sprs) + if name in special_sprs: + self.spr[name] = output + else: + self.namespace[name].eq(output) + if name == 'MSR': + log('msr written', hex(self.msr.value)) + else: + regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, name) + if regnum is None: + regnum, is_vec = yield from get_pdecode_idx_out2( + self.dec2, name) + if regnum is None: + # temporary hack for not having 2nd output + regnum = yield getattr(self.decoder, name) + is_vec = False + if self.is_svp64_mode and self.pred_dst_zero: + log('zeroing reg %d %s' % (regnum, str(output)), + is_vec) + output = SelectableInt(0, 256) + else: + if name in fregs: + ftype = 'fpr' else: - regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, - name) - if regnum is None: - regnum, is_vec = yield from get_pdecode_idx_out2( - self.dec2, name) - if regnum is None: - # temporary hack for not having 2nd output - regnum = yield getattr(self.decoder, name) - is_vec = False - if self.is_svp64_mode and pred_dst_zero: - log('zeroing reg %d %s' % (regnum, str(output)), - is_vec) - output = SelectableInt(0, 256) - else: - if name in fregs: - ftype = 'fpr' - else: - ftype = 'gpr' - log('writing %s %s %s' % (regnum, ftype, str(output)), - is_vec) - if output.bits > 64: - output = SelectableInt(output.value, 64) - if name in fregs: - self.fpr[regnum] = output - else: - self.gpr[regnum] = output + ftype = 'gpr' + log('writing %s %s %s' % (ftype, regnum, str(output)), + is_vec) + if output.bits > 64: + output = SelectableInt(output.value, 64) + if name in fregs: + self.fpr[regnum] = output + else: + self.gpr[regnum] = output + def check_step_increment(self, results, rc_en, asmop, ins_name): # check if it is the SVSTATE.src/dest step that needs incrementing # this is our Sub-Program-Counter loop from 0 to VL-1 - if self.is_svp64_mode: - # XXX twin predication TODO - vl = self.svstate.vl.asint(msb0=True) - mvl = self.svstate.maxvl.asint(msb0=True) - srcstep = self.svstate.srcstep.asint(msb0=True) - dststep = self.svstate.dststep.asint(msb0=True) - rm_mode = yield self.dec2.rm_dec.mode - reverse_gear = yield self.dec2.rm_dec.reverse_gear - sv_ptype = yield self.dec2.dec.op.SV_Ptype - out_vec = not (yield self.dec2.no_out_vec) - in_vec = not (yield self.dec2.no_in_vec) - log (" svstate.vl", vl) - log (" svstate.mvl", mvl) - log (" svstate.srcstep", srcstep) - log (" svstate.dststep", dststep) - log (" mode", rm_mode) - log (" reverse", reverse_gear) - log (" out_vec", out_vec) - log (" in_vec", in_vec) - log (" sv_ptype", sv_ptype, sv_ptype == SVPtype.P2.value) - # check if srcstep needs incrementing by one, stop PC advancing - # svp64 loop can end early if the dest is scalar for single-pred - # but for 2-pred both src/dest have to be checked. - # XXX this might not be true! it may just be LD/ST - if sv_ptype == SVPtype.P2.value: - svp64_is_vector = (out_vec or in_vec) + pre = False + post = False + nia_update = True + if self.allow_next_step_inc: + log("SVSTATE_NEXT: inc requested, mode", + self.svstate_next_mode, self.allow_next_step_inc) + yield from self.svstate_pre_inc() + pre = yield from self.update_new_svstate_steps() + if pre: + # reset at end of loop including exit Vertical Mode + log("SVSTATE_NEXT: end of loop, reset") + self.svp64_reset_loop() + self.svstate.vfirst = 0 + self.update_nia() + if not rc_en: + return True + results = [SelectableInt(0, 64)] + self.handle_comparison(results) # CR0 + return True + if self.allow_next_step_inc == 2: + log("SVSTATE_NEXT: read") + nia_update = (yield from self.svstate_post_inc(ins_name)) else: - svp64_is_vector = out_vec - if svp64_is_vector and srcstep != vl-1 and dststep != vl-1: - self.svstate.srcstep += SelectableInt(1, 7) - self.svstate.dststep += SelectableInt(1, 7) - self.pc.NIA.value = self.pc.CIA.value - self.namespace['NIA'] = self.pc.NIA - self.namespace['SVSTATE'] = self.svstate.spr - log("end of sub-pc call", self.namespace['CIA'], - self.namespace['NIA']) - return # DO NOT allow PC to update whilst Sub-PC loop running - # reset loop to zero - self.svp64_reset_loop() + log("SVSTATE_NEXT: post-inc") + # use actual src/dst-step here to check end, do NOT + # use bit-reversed version + srcstep, dststep = self.new_srcstep, self.new_dststep + ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep + remaps = self.get_remap_indices() + remap_idxs = self.remap_idxs + vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl + end_src = srcstep == vl-1 + end_dst = dststep == vl-1 + if self.allow_next_step_inc != 2: + yield from self.advance_svstate_steps(end_src, end_dst) + self.namespace['SVSTATE'] = self.svstate.spr + # set CR0 (if Rc=1) based on end + if rc_en: + endtest = 1 if (end_src or end_dst) else 0 + #results = [SelectableInt(endtest, 64)] + # self.handle_comparison(results) # CR0 + + # see if svstep was requested, if so, which SVSTATE + endings = 0b111 + if self.svstate_next_mode > 0: + shape_idx = self.svstate_next_mode.value-1 + endings = self.remap_loopends[shape_idx] + cr_field = SelectableInt((~endings) << 1 | endtest, 4) + print("svstep Rc=1, CR0", cr_field) + self.crl[0].eq(cr_field) # CR0 + if end_src or end_dst: + # reset at end of loop including exit Vertical Mode + log("SVSTATE_NEXT: after increments, reset") + self.svp64_reset_loop() + self.svstate.vfirst = 0 + return nia_update + + if self.is_svp64_mode: + return (yield from self.svstate_post_inc(ins_name)) # XXX only in non-SVP64 mode! # record state of whether the current operation was an svshape, + # OR svindex! # to be able to know if it should apply in the next instruction. # also (if going to use this instruction) should disable ability # to interrupt in between. sigh. - self.last_op_svshape = asmop == 'svremap' + self.last_op_svshape = asmop in ['svremap', 'svindex'] + + return True + + def SVSTATE_NEXT(self, mode, submode): + """explicitly moves srcstep/dststep on to next element, for + "Vertical-First" mode. this function is called from + setvl pseudo-code, as a pseudo-op "svstep" - self.update_pc_next() + WARNING: this function uses information that was created EARLIER + due to it being in the middle of a yield, but this function is + *NOT* called from yield (it's called from compiled pseudocode). + """ + self.allow_next_step_inc = submode.value + 1 + log("SVSTATE_NEXT mode", mode, submode, self.allow_next_step_inc) + self.svstate_next_mode = mode + if self.svstate_next_mode > 0 and self.svstate_next_mode < 5: + shape_idx = self.svstate_next_mode.value-1 + return SelectableInt(self.remap_idxs[shape_idx], 7) + if self.svstate_next_mode == 5: + self.svstate_next_mode = 0 + return SelectableInt(self.svstate.srcstep, 7) + if self.svstate_next_mode == 6: + self.svstate_next_mode = 0 + return SelectableInt(self.svstate.dststep, 7) + return SelectableInt(0, 7) + + def svstate_pre_inc(self): + """check if srcstep/dststep need to skip over masked-out predicate bits + note that this is not supposed to do anything to substep, + it is purely for skipping masked-out bits + """ + # get SVSTATE VL (oh and print out some debug stuff) + vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl + srcstep = self.svstate.srcstep + dststep = self.svstate.dststep + ssubstep = self.svstate.ssubstep + dsubstep = self.svstate.dsubstep + sv_a_nz = yield self.dec2.sv_a_nz + fft_mode = yield self.dec2.use_svp64_fft + in1 = yield self.dec2.e.read_reg1.data + log("SVP64: VL, subvl, srcstep, dststep, ssubstep, dsybstep, sv_a_nz, " + "in1 fft, svp64", + vl, subvl, srcstep, dststep, ssubstep, dsubstep, + sv_a_nz, in1, fft_mode, + self.is_svp64_mode) + + # get predicate mask (all 64 bits) + srcmask = dstmask = 0xffff_ffff_ffff_ffff + + pmode = yield self.dec2.rm_dec.predmode + pack = yield self.dec2.rm_dec.pack + unpack = yield self.dec2.rm_dec.unpack + reverse_gear = yield self.dec2.rm_dec.reverse_gear + sv_ptype = yield self.dec2.dec.op.SV_Ptype + srcpred = yield self.dec2.rm_dec.srcpred + dstpred = yield self.dec2.rm_dec.dstpred + pred_src_zero = yield self.dec2.rm_dec.pred_sz + pred_dst_zero = yield self.dec2.rm_dec.pred_dz + if pmode == SVP64PredMode.INT.value: + srcmask = dstmask = get_predint(self.gpr, dstpred) + if sv_ptype == SVPtype.P2.value: + srcmask = get_predint(self.gpr, srcpred) + elif pmode == SVP64PredMode.CR.value: + srcmask = dstmask = get_predcr(self.crl, dstpred, vl) + if sv_ptype == SVPtype.P2.value: + srcmask = get_predcr(self.crl, srcpred, vl) + # work out if the ssubsteps are completed + ssubstart = ssubstep == 0 + dsubstart = dsubstep == 0 + log(" pmode", pmode) + log(" pack/unpack", pack, unpack) + log(" reverse", reverse_gear) + log(" ptype", sv_ptype) + log(" srcpred", bin(srcpred)) + log(" dstpred", bin(dstpred)) + log(" srcmask", bin(srcmask)) + log(" dstmask", bin(dstmask)) + log(" pred_sz", bin(pred_src_zero)) + log(" pred_dz", bin(pred_dst_zero)) + log(" ssubstart", ssubstart) + log(" dsubstart", dsubstart) + + # okaaay, so here we simply advance srcstep (TODO dststep) + # this can ONLY be done at the beginning of the "for" loop + # (this is all actually a FSM so it's hell to keep track sigh) + if ssubstart: + # until the predicate mask has a "1" bit... or we run out of VL + # let srcstep==VL be the indicator to move to next instruction + if not pred_src_zero: + while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl): + log(" sskip", bin(1 << srcstep)) + srcstep += 1 + if dsubstart: + # same for dststep + if not pred_dst_zero: + while (((1 << dststep) & dstmask) == 0) and (dststep != vl): + log(" dskip", bin(1 << dststep)) + dststep += 1 + + # now work out if the relevant mask bits require zeroing + if pred_dst_zero: + pred_dst_zero = ((1 << dststep) & dstmask) == 0 + if pred_src_zero: + pred_src_zero = ((1 << srcstep) & srcmask) == 0 + + # store new srcstep / dststep + self.new_srcstep, self.new_dststep = (srcstep, dststep) + self.new_ssubstep, self.new_dsubstep = (ssubstep, dsubstep) + self.pred_dst_zero, self.pred_src_zero = (pred_dst_zero, pred_src_zero) + log(" new srcstep", srcstep) + log(" new dststep", dststep) + log(" new ssubstep", ssubstep) + log(" new dsubstep", dsubstep) + + def get_src_dststeps(self): + """gets srcstep, dststep, and ssubstep, dsubstep + """ + return (self.new_srcstep, self.new_dststep, + self.new_ssubstep, self.new_dsubstep) + + def update_new_svstate_steps(self): + # note, do not get the bit-reversed srcstep here! + srcstep, dststep = self.new_srcstep, self.new_dststep + ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep + + # update SVSTATE with new srcstep + self.svstate.srcstep = srcstep + self.svstate.dststep = dststep + self.svstate.ssubstep = ssubstep + self.svstate.dsubstep = dsubstep + self.namespace['SVSTATE'] = self.svstate + yield self.dec2.state.svstate.eq(self.svstate.value) + yield Settle() # let decoder update + srcstep = self.svstate.srcstep + dststep = self.svstate.dststep + ssubstep = self.svstate.ssubstep + dsubstep = self.svstate.dsubstep + vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl + log(" srcstep", srcstep) + log(" dststep", dststep) + log(" ssubstep", ssubstep) + log(" dsubstep", dsubstep) + log(" vl", vl) + log(" subvl", subvl) + + # check if end reached (we let srcstep overrun, above) + # nothing needs doing (TODO zeroing): just do next instruction + return srcstep == vl or dststep == vl + + def svstate_post_inc(self, insn_name, vf=0): + # check if SV "Vertical First" mode is enabled + vfirst = self.svstate.vfirst + log(" SV Vertical First", vf, vfirst) + if not vf and vfirst == 1: + self.update_nia() + return True + + # check if it is the SVSTATE.src/dest step that needs incrementing + # this is our Sub-Program-Counter loop from 0 to VL-1 + # XXX twin predication TODO + vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl + mvl = self.svstate.maxvl + srcstep = self.svstate.srcstep + dststep = self.svstate.dststep + ssubstep = self.svstate.ssubstep + dsubstep = self.svstate.dsubstep + rm_mode = yield self.dec2.rm_dec.mode + reverse_gear = yield self.dec2.rm_dec.reverse_gear + sv_ptype = yield self.dec2.dec.op.SV_Ptype + out_vec = not (yield self.dec2.no_out_vec) + in_vec = not (yield self.dec2.no_in_vec) + log(" svstate.vl", vl) + log(" svstate.mvl", mvl) + log(" rm.subvl", subvl) + log(" svstate.srcstep", srcstep) + log(" svstate.dststep", dststep) + log(" svstate.ssubstep", ssubstep) + log(" svstate.dsubstep", dsubstep) + log(" mode", rm_mode) + log(" reverse", reverse_gear) + log(" out_vec", out_vec) + log(" in_vec", in_vec) + log(" sv_ptype", sv_ptype, sv_ptype == SVPtype.P2.value) + # check if srcstep needs incrementing by one, stop PC advancing + # svp64 loop can end early if the dest is scalar for single-pred + # but for 2-pred both src/dest have to be checked. + # XXX this might not be true! it may just be LD/ST + if sv_ptype == SVPtype.P2.value: + svp64_is_vector = (out_vec or in_vec) + else: + svp64_is_vector = out_vec + # check if this was an sv.bc* and if so did it succeed + if self.is_svp64_mode and insn_name.startswith("sv.bc"): + end_loop = self.namespace['end_loop'] + log("branch %s end_loop" % insn_name, end_loop) + if end_loop.value: + self.svp64_reset_loop() + self.update_pc_next() + return False + # loops end at the first "hit" (source or dest) + loopend = ((srcstep == vl-1 and ssubstep == subvl) or + (dststep == vl-1 and dsubstep == subvl)) + if not svp64_is_vector or loopend: + # reset loop to zero and update NIA + self.svp64_reset_loop() + self.update_nia() + + return True + + # still looping, advance and update NIA + yield from self.advance_svstate_steps() + self.namespace['SVSTATE'] = self.svstate + # not an SVP64 branch, so fix PC (NIA==CIA) for next loop + # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64) + # this way we keep repeating the same instruction (with new steps) + self.pc.NIA.value = self.pc.CIA.value + self.namespace['NIA'] = self.pc.NIA + log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA']) + return False # DO NOT allow PC update whilst Sub-PC loop running + + def advance_svstate_steps(self, end_src=False, end_dst=False): + """ advance sub/steps. note that Pack/Unpack *INVERTS* the order. + TODO when Pack/Unpack is set, substep becomes the *outer* loop + """ + subvl = yield self.dec2.rm_dec.rm_in.subvl + # first source step + ssubstep = self.svstate.ssubstep + end_sub = ssubstep == subvl + if end_sub: + if not end_src: + self.svstate.srcstep += SelectableInt(1, 7) + self.svstate.ssubstep = SelectableInt(0, 2) # reset + else: + self.svstate.ssubstep += SelectableInt(1, 2) # advance ssubstep + # now dest step + dsubstep = self.svstate.dsubstep + end_sub = dsubstep == subvl + if end_sub: + if not end_dst: + self.svstate.dststep += SelectableInt(1, 7) + self.svstate.dsubstep = SelectableInt(0, 2) # reset + else: + self.svstate.dsubstep += SelectableInt(1, 2) # advance ssubstep def update_pc_next(self): # UPDATE program counter self.pc.update(self.namespace, self.is_svp64_mode) self.svstate.spr = self.namespace['SVSTATE'] log("end of call", self.namespace['CIA'], - self.namespace['NIA'], - self.namespace['SVSTATE']) + self.namespace['NIA'], + self.namespace['SVSTATE']) def svp64_reset_loop(self): - self.svstate.srcstep[0:7] = 0 - self.svstate.dststep[0:7] = 0 - log (" svstate.srcstep loop end (PC to update)") + self.svstate.srcstep = 0 + self.svstate.dststep = 0 + self.svstate.ssubstep = 0 + self.svstate.dsubstep = 0 + log(" svstate.srcstep loop end (PC to update)") + self.namespace['SVSTATE'] = self.svstate + + def update_nia(self): self.pc.update_nia(self.is_svp64_mode) self.namespace['NIA'] = self.pc.NIA - self.namespace['SVSTATE'] = self.svstate.spr def inject(): @@ -1539,12 +1972,15 @@ def inject(): context = args[0].namespace # variables to be injected saved_values = func_globals.copy() # Shallow copy of dict. + log("globals before", context.keys()) func_globals.update(context) result = func(*args, **kwargs) log("globals after", func_globals['CIA'], func_globals['NIA']) log("args[0]", args[0].namespace['CIA'], - args[0].namespace['NIA'], - args[0].namespace['SVSTATE']) + args[0].namespace['NIA'], + args[0].namespace['SVSTATE']) + if 'end_loop' in func_globals: + log("args[0] end_loop", func_globals['end_loop']) args[0].namespace = func_globals #exec (func.__code__, func_globals) @@ -1556,5 +1992,3 @@ def inject(): return decorator return variable_injector - -