X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fopenpower%2Fdecoder%2Fisa%2Fcaller.py;h=030ad3d5a49f69c01b7b195f7083d951a9f45de3;hb=5025354342d8e92f200b9c0c0a04f96a53d7f0e3;hp=757527c4c91cc6312cd45450b0927519e8c91255;hpb=e22f432e89b9a227884b8a9c1ab8f8343e77b893;p=openpower-isa.git diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index 757527c4..9a8c2481 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -13,46 +13,46 @@ related bugs: * https://bugs.libre-soc.org/show_bug.cgi?id=424 """ -import re -from nmigen.sim import Settle, Delay +from collections import namedtuple +from copy import deepcopy from functools import wraps -from copy import copy, deepcopy -from openpower.decoder.orderedset import OrderedSet -from openpower.decoder.selectable_int import ( - FieldSelectableInt, - SelectableInt, - selectconcat, -) -from openpower.decoder.power_insn import SVP64Instruction -from openpower.decoder.power_enums import (spr_dict, spr_byname, XER_bits, - insns, MicrOp, - In1Sel, In2Sel, In3Sel, - OutSel, CRInSel, CROutSel, LDSTMode, - SVP64RMMode, SVP64PredMode, - SVP64PredInt, SVP64PredCR, - SVP64LDSTmode) - -from openpower.decoder.power_enums import SVPtype - -from openpower.decoder.helpers import (exts, gtu, ltu, undefined, - ISACallerHelper, ISAFPHelpers) -from openpower.consts import PIb, MSRb # big-endian (PowerISA versions) -from openpower.consts import (SVP64MODE, - SVP64CROffs, - ) -from openpower.decoder.power_svp64 import SVP64RM, decode_extra - +import os +import sys +from elftools.elf.elffile import ELFFile # for isinstance + +from nmigen.sim import Settle +import openpower.syscalls +from openpower.consts import (MSRb, PIb, # big-endian (PowerISA versions) + SVP64CROffs, SVP64MODEb) +from openpower.decoder.helpers import (ISACallerHelper, ISAFPHelpers, exts, + gtu, undefined, copy_assign_rhs) +from openpower.decoder.isa.mem import Mem, MemMMap, MemException, LoadedELF from openpower.decoder.isa.radixmmu import RADIX -from openpower.decoder.isa.mem import Mem, swap_order, MemException from openpower.decoder.isa.svshape import SVSHAPE from openpower.decoder.isa.svstate import SVP64State +from openpower.decoder.orderedset import OrderedSet +from openpower.decoder.power_enums import (FPTRANS_INSNS, CRInSel, CROutSel, + In1Sel, In2Sel, In3Sel, LDSTMode, + MicrOp, OutSel, SVMode, + SVP64LDSTmode, SVP64PredCR, + SVP64PredInt, SVP64PredMode, + SVP64RMMode, SVPType, XER_bits, + insns, spr_byname, spr_dict, + BFP_FLAG_NAMES) +from openpower.insndb.core import SVP64Instruction +from openpower.decoder.power_svp64 import SVP64RM, decode_extra +from openpower.decoder.selectable_int import (FieldSelectableInt, + SelectableInt, selectconcat, + EFFECTIVELY_UNLIMITED) +from openpower.consts import DEFAULT_MSR +from openpower.fpscr import FPSCRState +from openpower.xer import XERState +from openpower.util import LogType, log +LDST_UPDATE_INSNS = ['ldu', 'lwzu', 'lbzu', 'lhzu', 'lhau', 'lfsu', 'lfdu', + 'stwu', 'stbu', 'sthu', 'stfsu', 'stfdu', 'stdu', + ] -from openpower.util import LogKind, log - -from collections import namedtuple -import math -import sys instruction_info = namedtuple('instruction_info', 'func read_regs uninit_regs write_regs ' + @@ -66,6 +66,9 @@ special_sprs = { 'VRSAVE': 256} +# rrright. this is here basically because the compiler pywriter returns +# results in a specific priority order. to make sure regs match up they +# need partial sorting. sigh. REG_SORT_ORDER = { # TODO (lkcl): adjust other registers that should be in a particular order # probably CA, CA32, and CR @@ -94,12 +97,46 @@ REG_SORT_ORDER = { "CA": 0, "CA32": 0, + "FPSCR": 1, + "overflow": 7, # should definitely be last + "CR0": 8, # likewise } fregs = ['FRA', 'FRB', 'FRC', 'FRS', 'FRT'] +def get_masked_reg(regs, base, offs, ew_bits): + # rrrright. start by breaking down into row/col, based on elwidth + gpr_offs = offs // (64 // ew_bits) + gpr_col = offs % (64 // ew_bits) + # compute the mask based on ew_bits + mask = (1 << ew_bits) - 1 + # now select the 64-bit register, but get its value (easier) + val = regs[base + gpr_offs] + # shift down so element we want is at LSB + val >>= gpr_col * ew_bits + # mask so we only return the LSB element + return val & mask + + +def set_masked_reg(regs, base, offs, ew_bits, value): + # rrrright. start by breaking down into row/col, based on elwidth + gpr_offs = offs // (64//ew_bits) + gpr_col = offs % (64//ew_bits) + # compute the mask based on ew_bits + mask = (1 << ew_bits)-1 + # now select the 64-bit register, but get its value (easier) + val = regs[base+gpr_offs] + # now mask out the bit we don't want + val = val & ~(mask << (gpr_col*ew_bits)) + # then wipe the bit we don't want from the value + value = value & mask + # OR the new value in, shifted up + val |= value << (gpr_col*ew_bits) + regs[base+gpr_offs] = val + + def create_args(reglist, extra=None): retval = list(OrderedSet(reglist)) retval.sort(key=lambda reg: REG_SORT_ORDER.get(reg, 0)) @@ -108,6 +145,12 @@ def create_args(reglist, extra=None): return retval +def create_full_args(*, read_regs, special_regs, uninit_regs, write_regs, + extra=None): + return create_args([ + *read_regs, *uninit_regs, *write_regs, *special_regs], extra=extra) + + class GPR(dict): def __init__(self, decoder, isacaller, svstate, regfile): dict.__init__(self) @@ -117,14 +160,56 @@ class GPR(dict): for i in range(len(regfile)): self[i] = SelectableInt(regfile[i], 64) - def __call__(self, ridx): + def __call__(self, ridx, is_vec=False, offs=0, elwidth=64): if isinstance(ridx, SelectableInt): ridx = ridx.value - return self[ridx] + if elwidth == 64: + return self[ridx+offs] + # rrrright. start by breaking down into row/col, based on elwidth + gpr_offs = offs // (64//elwidth) + gpr_col = offs % (64//elwidth) + # now select the 64-bit register, but get its value (easier) + val = self[ridx+gpr_offs].value + # now shift down and mask out + val = val >> (gpr_col*elwidth) & ((1 << elwidth)-1) + # finally, return a SelectableInt at the required elwidth + log("GPR call", ridx, "isvec", is_vec, "offs", offs, + "elwid", elwidth, "offs/col", gpr_offs, gpr_col, "val", hex(val)) + return SelectableInt(val, elwidth) def set_form(self, form): self.form = form + def write(self, rnum, value, is_vec=False, elwidth=64): + # get internal value + if isinstance(rnum, SelectableInt): + rnum = rnum.value + if isinstance(value, SelectableInt): + value = value.value + # compatibility... + if isinstance(rnum, tuple): + rnum, base, offs = rnum + else: + base, offs = rnum, 0 + # rrrright. start by breaking down into row/col, based on elwidth + gpr_offs = offs // (64//elwidth) + gpr_col = offs % (64//elwidth) + # compute the mask based on elwidth + mask = (1 << elwidth)-1 + # now select the 64-bit register, but get its value (easier) + val = self[base+gpr_offs].value + # now mask out the bit we don't want + val = val & ~(mask << (gpr_col*elwidth)) + # then wipe the bit we don't want from the value + value = value & mask + # OR the new value in, shifted up + val |= value << (gpr_col*elwidth) + # finally put the damn value into the regfile + log("GPR write", base, "isvec", is_vec, "offs", offs, + "elwid", elwidth, "offs/col", gpr_offs, gpr_col, "val", hex(val), + "@", base+gpr_offs) + dict.__setitem__(self, base+gpr_offs, SelectableInt(val, 64)) + def __setitem__(self, rnum, value): # rnum = rnum.value # only SelectableInt allowed log("GPR setitem", rnum, value) @@ -161,13 +246,14 @@ class GPR(dict): for j in range(8): s.append("%08x" % res[i+j]) s = ' '.join(s) - print("reg", "%2d" % i, s) + log("reg", "%2d" % i, s, kind=LogType.InstrInOuts) return res class SPR(dict): - def __init__(self, dec2, initial_sprs={}): + def __init__(self, dec2, initial_sprs={}, gpr=None): self.sd = dec2 + self.gpr = gpr # for SVSHAPE[0-3] dict.__init__(self) for key, v in initial_sprs.items(): if isinstance(key, SelectableInt): @@ -182,8 +268,8 @@ class SPR(dict): self[key] = v def __getitem__(self, key): - log("get spr", key) - log("dict", self.items()) + #log("get spr", key) + #log("dict", self.items()) # if key in special_sprs get the special spr, otherwise return key if isinstance(key, SelectableInt): key = key.value @@ -201,9 +287,9 @@ class SPR(dict): info = spr_dict[key] else: info = spr_byname[key] - dict.__setitem__(self, key, SelectableInt(0, info.length)) + self[key] = SelectableInt(0, info.length) res = dict.__getitem__(self, key) - log("spr returning", key, res) + #log("spr returning", key, res) return res def __setitem__(self, key, value): @@ -217,6 +303,10 @@ class SPR(dict): self.__setitem__('SRR0', value) if key == 'HSRR1': # HACK! self.__setitem__('SRR1', value) + if key == 1: + value = XERState(value) + if key in ('SVSHAPE0', 'SVSHAPE1', 'SVSHAPE2', 'SVSHAPE3'): + value = SVSHAPE(value, self.gpr) log("setting spr", key, value) dict.__setitem__(self, key, value) @@ -278,33 +368,32 @@ class CRFields: _cr = FieldSelectableInt(self.cr, bits) self.crl.append(_cr) -# decode SVP64 predicate integer to reg number and invert - +# decode SVP64 predicate integer to reg number and invert def get_predint(gpr, mask): + r3 = gpr(3) r10 = gpr(10) r30 = gpr(30) log("get_predint", mask, SVP64PredInt.ALWAYS.value) if mask == SVP64PredInt.ALWAYS.value: return 0xffff_ffff_ffff_ffff # 64 bits of 1 if mask == SVP64PredInt.R3_UNARY.value: - return 1 << (gpr(3).value & 0b111111) + return 1 << (r3.value & 0b111111) if mask == SVP64PredInt.R3.value: - return gpr(3).value + return r3.value if mask == SVP64PredInt.R3_N.value: - return ~gpr(3).value + return ~r3.value if mask == SVP64PredInt.R10.value: - return gpr(10).value + return r10.value if mask == SVP64PredInt.R10_N.value: - return ~gpr(10).value + return ~r10.value if mask == SVP64PredInt.R30.value: - return gpr(30).value + return r30.value if mask == SVP64PredInt.R30_N.value: - return ~gpr(30).value - -# decode SVP64 predicate CR to reg number and invert status + return ~r30.value +# decode SVP64 predicate CR to reg number and invert status def _get_predcr(mask): if mask == SVP64PredCR.LT.value: return 0, 1 @@ -323,10 +412,9 @@ def _get_predcr(mask): if mask == SVP64PredCR.NS.value: return 3, 0 + # read individual CR fields (0..VL-1), extract the required bit # and construct the mask - - def get_predcr(crl, mask, vl): idx, noninv = _get_predcr(mask) mask = 0 @@ -338,71 +426,110 @@ def get_predcr(crl, mask, vl): # TODO, really should just be using PowerDecoder2 -def get_pdecode_idx_in(dec2, name): +def get_idx_map(dec2, name): op = dec2.dec.op in1_sel = yield op.in1_sel in2_sel = yield op.in2_sel in3_sel = yield op.in3_sel - # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec) in1 = yield dec2.e.read_reg1.data - in2 = yield dec2.e.read_reg2.data - in3 = yield dec2.e.read_reg3.data - in1_isvec = yield dec2.in1_isvec - in2_isvec = yield dec2.in2_isvec - in3_isvec = yield dec2.in3_isvec - log("get_pdecode_idx_in in1", name, in1_sel, In1Sel.RA.value, - in1, in1_isvec) - log("get_pdecode_idx_in in2", name, in2_sel, In2Sel.RB.value, - in2, in2_isvec) - log("get_pdecode_idx_in in3", name, in3_sel, In3Sel.RS.value, - in3, in3_isvec) - log("get_pdecode_idx_in FRS in3", name, in3_sel, In3Sel.FRS.value, - in3, in3_isvec) - log("get_pdecode_idx_in FRB in2", name, in2_sel, In2Sel.FRB.value, - in2, in2_isvec) - log("get_pdecode_idx_in FRC in3", name, in3_sel, In3Sel.FRC.value, - in3, in3_isvec) # identify which regnames map to in1/2/3 - if name == 'RA': + if name == 'RA' or name == 'RA_OR_ZERO': if (in1_sel == In1Sel.RA.value or (in1_sel == In1Sel.RA_OR_ZERO.value and in1 != 0)): - return in1, in1_isvec + return 1 if in1_sel == In1Sel.RA_OR_ZERO.value: - return in1, in1_isvec + return 1 elif name == 'RB': if in2_sel == In2Sel.RB.value: - return in2, in2_isvec + return 2 if in3_sel == In3Sel.RB.value: - return in3, in3_isvec + return 3 # XXX TODO, RC doesn't exist yet! elif name == 'RC': - assert False, "RC does not exist yet" - elif name == 'RS': + if in3_sel == In3Sel.RC.value: + return 3 + elif name in ['EA', 'RS']: if in1_sel == In1Sel.RS.value: - return in1, in1_isvec + return 1 if in2_sel == In2Sel.RS.value: - return in2, in2_isvec + return 2 if in3_sel == In3Sel.RS.value: - return in3, in3_isvec + return 3 elif name == 'FRA': if in1_sel == In1Sel.FRA.value: - return in1, in1_isvec + return 1 + if in3_sel == In3Sel.FRA.value: + return 3 elif name == 'FRB': if in2_sel == In2Sel.FRB.value: - return in2, in2_isvec + return 2 elif name == 'FRC': if in3_sel == In3Sel.FRC.value: - return in3, in3_isvec + return 3 elif name == 'FRS': if in1_sel == In1Sel.FRS.value: - return in1, in1_isvec + return 1 if in3_sel == In3Sel.FRS.value: - return in3, in3_isvec + return 3 + elif name == 'FRT': + if in1_sel == In1Sel.FRT.value: + return 1 + elif name == 'RT': + if in1_sel == In1Sel.RT.value: + return 1 + return None + + +# TODO, really should just be using PowerDecoder2 +def get_idx_in(dec2, name, ewmode=False): + idx = yield from get_idx_map(dec2, name) + if idx is None: + return None, False + op = dec2.dec.op + in1_sel = yield op.in1_sel + in2_sel = yield op.in2_sel + in3_sel = yield op.in3_sel + # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec) + in1 = yield dec2.e.read_reg1.data + in2 = yield dec2.e.read_reg2.data + in3 = yield dec2.e.read_reg3.data + if ewmode: + in1_base = yield dec2.e.read_reg1.base + in2_base = yield dec2.e.read_reg2.base + in3_base = yield dec2.e.read_reg3.base + in1_offs = yield dec2.e.read_reg1.offs + in2_offs = yield dec2.e.read_reg2.offs + in3_offs = yield dec2.e.read_reg3.offs + in1 = (in1, in1_base, in1_offs) + in2 = (in2, in2_base, in2_offs) + in3 = (in3, in3_base, in3_offs) + + in1_isvec = yield dec2.in1_isvec + in2_isvec = yield dec2.in2_isvec + in3_isvec = yield dec2.in3_isvec + log("get_idx_in in1", name, in1_sel, In1Sel.RA.value, + in1, in1_isvec) + log("get_idx_in in2", name, in2_sel, In2Sel.RB.value, + in2, in2_isvec) + log("get_idx_in in3", name, in3_sel, In3Sel.RS.value, + in3, in3_isvec) + log("get_idx_in FRS in3", name, in3_sel, In3Sel.FRS.value, + in3, in3_isvec) + log("get_idx_in FRB in2", name, in2_sel, In2Sel.FRB.value, + in2, in2_isvec) + log("get_idx_in FRC in3", name, in3_sel, In3Sel.FRC.value, + in3, in3_isvec) + if idx == 1: + return in1, in1_isvec + if idx == 2: + return in2, in2_isvec + if idx == 3: + return in3, in3_isvec return None, False # TODO, really should just be using PowerDecoder2 -def get_pdecode_cr_in(dec2, name): +def get_cr_in(dec2, name): op = dec2.dec.op in_sel = yield op.cr_in in_bitfield = yield dec2.dec_cr_in.cr_bitfield.data @@ -412,7 +539,7 @@ def get_pdecode_cr_in(dec2, name): # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec) in1 = yield dec2.e.read_cr1.data cr_isvec = yield dec2.cr_in_isvec - log("get_pdecode_cr_in", in_sel, CROutSel.CR0.value, in1, cr_isvec) + log("get_cr_in", in_sel, CROutSel.CR0.value, in1, cr_isvec) log(" sv_cr_in", sv_cr_in) log(" cr_bf", in_bitfield) log(" spec", spec) @@ -421,12 +548,12 @@ def get_pdecode_cr_in(dec2, name): if name == 'BI': if in_sel == CRInSel.BI.value: return in1, cr_isvec - log("get_pdecode_cr_in not found", name) + log("get_cr_in not found", name) return None, False # TODO, really should just be using PowerDecoder2 -def get_pdecode_cr_out(dec2, name): +def get_cr_out(dec2, name): op = dec2.dec.op out_sel = yield op.cr_out out_bitfield = yield dec2.dec_cr_out.cr_bitfield.data @@ -435,93 +562,620 @@ def get_pdecode_cr_out(dec2, name): sv_override = yield dec2.dec_cr_out.sv_override # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec) out = yield dec2.e.write_cr.data - o_isvec = yield dec2.o_isvec - log("get_pdecode_cr_out", out_sel, CROutSel.CR0.value, out, o_isvec) + o_isvec = yield dec2.cr_out_isvec + log("get_cr_out", out_sel, CROutSel.CR0.value, out, o_isvec) log(" sv_cr_out", sv_cr_out) log(" cr_bf", out_bitfield) log(" spec", spec) log(" override", sv_override) # identify which regnames map to out / o2 + if name == 'BF': + if out_sel == CROutSel.BF.value: + return out, o_isvec if name == 'CR0': if out_sel == CROutSel.CR0.value: return out, o_isvec - if name == 'CR1': # these are not actually calculated correctly + if name == 'CR1': # these are not actually calculated correctly if out_sel == CROutSel.CR1.value: return out, o_isvec - log("get_pdecode_cr_out not found", name) + # check RC1 set? if so return implicit vector, this is a REAL bad hack + RC1 = yield dec2.rm_dec.RC1 + if RC1: + log("get_cr_out RC1 mode") + if name == 'CR0': + return 0, True # XXX TODO: offset CR0 from SVSTATE SPR + if name == 'CR1': + return 1, True # XXX TODO: offset CR1 from SVSTATE SPR + # nope - not found. + log("get_cr_out not found", name) return None, False # TODO, really should just be using PowerDecoder2 -def get_pdecode_idx_out(dec2, name): +def get_out_map(dec2, name): op = dec2.dec.op out_sel = yield op.out_sel # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec) out = yield dec2.e.write_reg.data - o_isvec = yield dec2.o_isvec # identify which regnames map to out / o2 if name == 'RA': - log("get_pdecode_idx_out", out_sel, OutSel.RA.value, out, o_isvec) if out_sel == OutSel.RA.value: - return out, o_isvec + return True elif name == 'RT': - log("get_pdecode_idx_out", out_sel, OutSel.RT.value, - OutSel.RT_OR_ZERO.value, out, o_isvec, - dec2.dec.RT) if out_sel == OutSel.RT.value: - return out, o_isvec + return True + if out_sel == OutSel.RT_OR_ZERO.value and out != 0: + return True elif name == 'RT_OR_ZERO': - log("get_pdecode_idx_out", out_sel, OutSel.RT.value, - OutSel.RT_OR_ZERO.value, out, o_isvec, - dec2.dec.RT) if out_sel == OutSel.RT_OR_ZERO.value: - return out, o_isvec + return True elif name == 'FRA': - log("get_pdecode_idx_out", out_sel, OutSel.FRA.value, out, o_isvec) if out_sel == OutSel.FRA.value: - return out, o_isvec + return True + elif name == 'FRS': + if out_sel == OutSel.FRS.value: + return True elif name == 'FRT': - log("get_pdecode_idx_out", out_sel, OutSel.FRT.value, - OutSel.FRT.value, out, o_isvec) if out_sel == OutSel.FRT.value: - return out, o_isvec - log("get_pdecode_idx_out not found", name, out_sel, out, o_isvec) + return True + return False + + +# TODO, really should just be using PowerDecoder2 +def get_idx_out(dec2, name, ewmode=False): + op = dec2.dec.op + out_sel = yield op.out_sel + # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec) + out = yield dec2.e.write_reg.data + o_isvec = yield dec2.o_isvec + if ewmode: + offs = yield dec2.e.write_reg.offs + base = yield dec2.e.write_reg.base + out = (out, base, offs) + # identify which regnames map to out / o2 + ismap = yield from get_out_map(dec2, name) + if ismap: + log("get_idx_out", name, out_sel, out, o_isvec) + return out, o_isvec + log("get_idx_out not found", name, out_sel, out, o_isvec) return None, False # TODO, really should just be using PowerDecoder2 -def get_pdecode_idx_out2(dec2, name): +def get_out2_map(dec2, name): # check first if register is activated for write op = dec2.dec.op out_sel = yield op.out_sel out = yield dec2.e.write_ea.data - o_isvec = yield dec2.o2_isvec out_ok = yield dec2.e.write_ea.ok - log("get_pdecode_idx_out2", name, out_sel, out, out_ok, o_isvec) if not out_ok: - return None, False + return False - if name == 'RA': + if name in ['EA', 'RA']: if hasattr(op, "upd"): # update mode LD/ST uses read-reg A also as an output upd = yield op.upd - log("get_pdecode_idx_out2", upd, LDSTMode.update.value, + log("get_idx_out2", upd, LDSTMode.update.value, out_sel, OutSel.RA.value, - out, o_isvec) + out) if upd == LDSTMode.update.value: - return out, o_isvec + return True + if name == 'RS': + fft_en = yield dec2.implicit_rs + if fft_en: + log("get_idx_out2", out_sel, OutSel.RS.value, + out) + return True if name == 'FRS': - int_op = yield dec2.dec.op.internal_op - fft_en = yield dec2.use_svp64_fft - # if int_op == MicrOp.OP_FP_MADD.value and fft_en: + fft_en = yield dec2.implicit_rs if fft_en: - log("get_pdecode_idx_out2", out_sel, OutSel.FRS.value, - out, o_isvec) - return out, o_isvec + log("get_idx_out2", out_sel, OutSel.FRS.value, + out) + return True + return False + + +# TODO, really should just be using PowerDecoder2 +def get_idx_out2(dec2, name, ewmode=False): + # check first if register is activated for write + op = dec2.dec.op + out_sel = yield op.out_sel + out = yield dec2.e.write_ea.data + if ewmode: + offs = yield dec2.e.write_ea.offs + base = yield dec2.e.write_ea.base + out = (out, base, offs) + o_isvec = yield dec2.o2_isvec + ismap = yield from get_out2_map(dec2, name) + if ismap: + log("get_idx_out2", name, out_sel, out, o_isvec) + return out, o_isvec return None, False -class ISACaller(ISACallerHelper, ISAFPHelpers): +class StepLoop: + """deals with svstate looping. + """ + + def __init__(self, svstate): + self.svstate = svstate + self.new_iterators() + + def new_iterators(self): + self.src_it = self.src_iterator() + self.dst_it = self.dst_iterator() + self.loopend = False + self.new_srcstep = 0 + self.new_dststep = 0 + self.new_ssubstep = 0 + self.new_dsubstep = 0 + self.pred_dst_zero = 0 + self.pred_src_zero = 0 + + def src_iterator(self): + """source-stepping iterator + """ + pack = self.svstate.pack + + # source step + if pack: + # pack advances subvl in *outer* loop + while True: # outer subvl loop + while True: # inner vl loop + vl = self.svstate.vl + subvl = self.subvl + srcmask = self.srcmask + srcstep = self.svstate.srcstep + pred_src_zero = ((1 << srcstep) & srcmask) != 0 + if self.pred_sz or pred_src_zero: + self.pred_src_zero = not pred_src_zero + log(" advance src", srcstep, vl, + self.svstate.ssubstep, subvl) + # yield actual substep/srcstep + yield (self.svstate.ssubstep, srcstep) + # the way yield works these could have been modified. + vl = self.svstate.vl + subvl = self.subvl + srcstep = self.svstate.srcstep + log(" advance src check", srcstep, vl, + self.svstate.ssubstep, subvl, srcstep == vl-1, + self.svstate.ssubstep == subvl) + if srcstep == vl-1: # end-point + self.svstate.srcstep = SelectableInt(0, 7) # reset + if self.svstate.ssubstep == subvl: # end-point + log(" advance pack stop") + return + break # exit inner loop + self.svstate.srcstep += SelectableInt(1, 7) # advance ss + subvl = self.subvl + if self.svstate.ssubstep == subvl: # end-point + self.svstate.ssubstep = SelectableInt(0, 2) # reset + log(" advance pack stop") + return + self.svstate.ssubstep += SelectableInt(1, 2) + + else: + # these cannot be done as for-loops because SVSTATE may change + # (srcstep/substep may be modified, interrupted, subvl/vl change) + # but they *can* be done as while-loops as long as every SVSTATE + # "thing" is re-read every single time a yield gives indices + while True: # outer vl loop + while True: # inner subvl loop + vl = self.svstate.vl + subvl = self.subvl + srcmask = self.srcmask + srcstep = self.svstate.srcstep + pred_src_zero = ((1 << srcstep) & srcmask) != 0 + if self.pred_sz or pred_src_zero: + self.pred_src_zero = not pred_src_zero + log(" advance src", srcstep, vl, + self.svstate.ssubstep, subvl) + # yield actual substep/srcstep + yield (self.svstate.ssubstep, srcstep) + if self.svstate.ssubstep == subvl: # end-point + self.svstate.ssubstep = SelectableInt(0, 2) # reset + break # exit inner loop + self.svstate.ssubstep += SelectableInt(1, 2) + vl = self.svstate.vl + if srcstep == vl-1: # end-point + self.svstate.srcstep = SelectableInt(0, 7) # reset + self.loopend = True + return + self.svstate.srcstep += SelectableInt(1, 7) # advance srcstep + + def dst_iterator(self): + """dest-stepping iterator + """ + unpack = self.svstate.unpack + + # dest step + if unpack: + # pack advances subvl in *outer* loop + while True: # outer subvl loop + while True: # inner vl loop + vl = self.svstate.vl + subvl = self.subvl + dstmask = self.dstmask + dststep = self.svstate.dststep + pred_dst_zero = ((1 << dststep) & dstmask) != 0 + if self.pred_dz or pred_dst_zero: + self.pred_dst_zero = not pred_dst_zero + log(" advance dst", dststep, vl, + self.svstate.dsubstep, subvl) + # yield actual substep/dststep + yield (self.svstate.dsubstep, dststep) + # the way yield works these could have been modified. + vl = self.svstate.vl + dststep = self.svstate.dststep + log(" advance dst check", dststep, vl, + self.svstate.ssubstep, subvl) + if dststep == vl-1: # end-point + self.svstate.dststep = SelectableInt(0, 7) # reset + if self.svstate.dsubstep == subvl: # end-point + log(" advance unpack stop") + return + break + self.svstate.dststep += SelectableInt(1, 7) # advance ds + subvl = self.subvl + if self.svstate.dsubstep == subvl: # end-point + self.svstate.dsubstep = SelectableInt(0, 2) # reset + log(" advance unpack stop") + return + self.svstate.dsubstep += SelectableInt(1, 2) + else: + # these cannot be done as for-loops because SVSTATE may change + # (dststep/substep may be modified, interrupted, subvl/vl change) + # but they *can* be done as while-loops as long as every SVSTATE + # "thing" is re-read every single time a yield gives indices + while True: # outer vl loop + while True: # inner subvl loop + subvl = self.subvl + dstmask = self.dstmask + dststep = self.svstate.dststep + pred_dst_zero = ((1 << dststep) & dstmask) != 0 + if self.pred_dz or pred_dst_zero: + self.pred_dst_zero = not pred_dst_zero + log(" advance dst", dststep, self.svstate.vl, + self.svstate.dsubstep, subvl) + # yield actual substep/dststep + yield (self.svstate.dsubstep, dststep) + if self.svstate.dsubstep == subvl: # end-point + self.svstate.dsubstep = SelectableInt(0, 2) # reset + break + self.svstate.dsubstep += SelectableInt(1, 2) + subvl = self.subvl + vl = self.svstate.vl + if dststep == vl-1: # end-point + self.svstate.dststep = SelectableInt(0, 7) # reset + return + self.svstate.dststep += SelectableInt(1, 7) # advance dststep + + def src_iterate(self): + """source-stepping iterator + """ + subvl = self.subvl + vl = self.svstate.vl + pack = self.svstate.pack + unpack = self.svstate.unpack + ssubstep = self.svstate.ssubstep + end_ssub = ssubstep == subvl + end_src = self.svstate.srcstep == vl-1 + log(" pack/unpack/subvl", pack, unpack, subvl, + "end", end_src, + "sub", end_ssub) + # first source step + srcstep = self.svstate.srcstep + srcmask = self.srcmask + if pack: + # pack advances subvl in *outer* loop + while True: + assert srcstep <= vl-1 + end_src = srcstep == vl-1 + if end_src: + if end_ssub: + self.loopend = True + else: + self.svstate.ssubstep += SelectableInt(1, 2) + srcstep = 0 # reset + break + else: + srcstep += 1 # advance srcstep + if not self.srcstep_skip: + break + if ((1 << srcstep) & srcmask) != 0: + break + else: + log(" sskip", bin(srcmask), bin(1 << srcstep)) + else: + # advance subvl in *inner* loop + if end_ssub: + while True: + assert srcstep <= vl-1 + end_src = srcstep == vl-1 + if end_src: # end-point + self.loopend = True + srcstep = 0 + break + else: + srcstep += 1 + if not self.srcstep_skip: + break + if ((1 << srcstep) & srcmask) != 0: + break + else: + log(" sskip", bin(srcmask), bin(1 << srcstep)) + self.svstate.ssubstep = SelectableInt(0, 2) # reset + else: + # advance ssubstep + self.svstate.ssubstep += SelectableInt(1, 2) + + self.svstate.srcstep = SelectableInt(srcstep, 7) + log(" advance src", self.svstate.srcstep, self.svstate.ssubstep, + self.loopend) + + def dst_iterate(self): + """dest step iterator + """ + vl = self.svstate.vl + subvl = self.subvl + pack = self.svstate.pack + unpack = self.svstate.unpack + dsubstep = self.svstate.dsubstep + end_dsub = dsubstep == subvl + dststep = self.svstate.dststep + end_dst = dststep == vl-1 + dstmask = self.dstmask + log(" pack/unpack/subvl", pack, unpack, subvl, + "end", end_dst, + "sub", end_dsub) + # now dest step + if unpack: + # unpack advances subvl in *outer* loop + while True: + assert dststep <= vl-1 + end_dst = dststep == vl-1 + if end_dst: + if end_dsub: + self.loopend = True + else: + self.svstate.dsubstep += SelectableInt(1, 2) + dststep = 0 # reset + break + else: + dststep += 1 # advance dststep + if not self.dststep_skip: + break + if ((1 << dststep) & dstmask) != 0: + break + else: + log(" dskip", bin(dstmask), bin(1 << dststep)) + else: + # advance subvl in *inner* loop + if end_dsub: + while True: + assert dststep <= vl-1 + end_dst = dststep == vl-1 + if end_dst: # end-point + self.loopend = True + dststep = 0 + break + else: + dststep += 1 + if not self.dststep_skip: + break + if ((1 << dststep) & dstmask) != 0: + break + else: + log(" dskip", bin(dstmask), bin(1 << dststep)) + self.svstate.dsubstep = SelectableInt(0, 2) # reset + else: + # advance ssubstep + self.svstate.dsubstep += SelectableInt(1, 2) + + self.svstate.dststep = SelectableInt(dststep, 7) + log(" advance dst", self.svstate.dststep, self.svstate.dsubstep, + self.loopend) + + def at_loopend(self): + """tells if this is the last possible element. uses the cached values + for src/dst-step and sub-steps + """ + subvl = self.subvl + vl = self.svstate.vl + srcstep, dststep = self.new_srcstep, self.new_dststep + ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep + end_ssub = ssubstep == subvl + end_dsub = dsubstep == subvl + if srcstep == vl-1 and end_ssub: + return True + if dststep == vl-1 and end_dsub: + return True + return False + + def advance_svstate_steps(self): + """ advance sub/steps. note that Pack/Unpack *INVERTS* the order. + TODO when Pack/Unpack is set, substep becomes the *outer* loop + """ + self.subvl = yield self.dec2.rm_dec.rm_in.subvl + if self.loopend: # huhn?? + return + self.src_iterate() + self.dst_iterate() + + def read_src_mask(self): + """read/update pred_sz and src mask + """ + # get SVSTATE VL (oh and print out some debug stuff) + vl = self.svstate.vl + srcstep = self.svstate.srcstep + ssubstep = self.svstate.ssubstep + + # get predicate mask (all 64 bits) + srcmask = 0xffff_ffff_ffff_ffff + + pmode = yield self.dec2.rm_dec.predmode + sv_ptype = yield self.dec2.dec.op.SV_Ptype + srcpred = yield self.dec2.rm_dec.srcpred + dstpred = yield self.dec2.rm_dec.dstpred + pred_sz = yield self.dec2.rm_dec.pred_sz + if pmode == SVP64PredMode.INT.value: + srcmask = dstmask = get_predint(self.gpr, dstpred) + if sv_ptype == SVPType.P2.value: + srcmask = get_predint(self.gpr, srcpred) + elif pmode == SVP64PredMode.CR.value: + srcmask = dstmask = get_predcr(self.crl, dstpred, vl) + if sv_ptype == SVPType.P2.value: + srcmask = get_predcr(self.crl, srcpred, vl) + # work out if the ssubsteps are completed + ssubstart = ssubstep == 0 + log(" pmode", pmode) + log(" ptype", sv_ptype) + log(" srcpred", bin(srcpred)) + log(" srcmask", bin(srcmask)) + log(" pred_sz", bin(pred_sz)) + log(" ssubstart", ssubstart) + + # store all that above + self.srcstep_skip = False + self.srcmask = srcmask + self.pred_sz = pred_sz + self.new_ssubstep = ssubstep + log(" new ssubstep", ssubstep) + # until the predicate mask has a "1" bit... or we run out of VL + # let srcstep==VL be the indicator to move to next instruction + if not pred_sz: + self.srcstep_skip = True + + def read_dst_mask(self): + """same as read_src_mask - check and record everything needed + """ + # get SVSTATE VL (oh and print out some debug stuff) + # yield Delay(1e-10) # make changes visible + vl = self.svstate.vl + dststep = self.svstate.dststep + dsubstep = self.svstate.dsubstep + + # get predicate mask (all 64 bits) + dstmask = 0xffff_ffff_ffff_ffff + + pmode = yield self.dec2.rm_dec.predmode + reverse_gear = yield self.dec2.rm_dec.reverse_gear + sv_ptype = yield self.dec2.dec.op.SV_Ptype + dstpred = yield self.dec2.rm_dec.dstpred + pred_dz = yield self.dec2.rm_dec.pred_dz + if pmode == SVP64PredMode.INT.value: + dstmask = get_predint(self.gpr, dstpred) + elif pmode == SVP64PredMode.CR.value: + dstmask = get_predcr(self.crl, dstpred, vl) + # work out if the ssubsteps are completed + dsubstart = dsubstep == 0 + log(" pmode", pmode) + log(" ptype", sv_ptype) + log(" dstpred", bin(dstpred)) + log(" dstmask", bin(dstmask)) + log(" pred_dz", bin(pred_dz)) + log(" dsubstart", dsubstart) + + self.dststep_skip = False + self.dstmask = dstmask + self.pred_dz = pred_dz + self.new_dsubstep = dsubstep + log(" new dsubstep", dsubstep) + if not pred_dz: + self.dststep_skip = True + + def svstate_pre_inc(self): + """check if srcstep/dststep need to skip over masked-out predicate bits + note that this is not supposed to do anything to substep, + it is purely for skipping masked-out bits + """ + + self.subvl = yield self.dec2.rm_dec.rm_in.subvl + yield from self.read_src_mask() + yield from self.read_dst_mask() + + self.skip_src() + self.skip_dst() + + def skip_src(self): + + srcstep = self.svstate.srcstep + srcmask = self.srcmask + pred_src_zero = self.pred_sz + vl = self.svstate.vl + # srcstep-skipping opportunity identified + if self.srcstep_skip: + # cannot do this with sv.bc - XXX TODO + if srcmask == 0: + self.loopend = True + while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl): + log(" sskip", bin(1 << srcstep)) + srcstep += 1 + + # now work out if the relevant mask bits require zeroing + if pred_src_zero: + pred_src_zero = ((1 << srcstep) & srcmask) == 0 + + # store new srcstep / dststep + self.new_srcstep = srcstep + self.pred_src_zero = pred_src_zero + log(" new srcstep", srcstep) + + def skip_dst(self): + # dststep-skipping opportunity identified + dststep = self.svstate.dststep + dstmask = self.dstmask + pred_dst_zero = self.pred_dz + vl = self.svstate.vl + if self.dststep_skip: + # cannot do this with sv.bc - XXX TODO + if dstmask == 0: + self.loopend = True + while (((1 << dststep) & dstmask) == 0) and (dststep != vl): + log(" dskip", bin(1 << dststep)) + dststep += 1 + + # now work out if the relevant mask bits require zeroing + if pred_dst_zero: + pred_dst_zero = ((1 << dststep) & dstmask) == 0 + + # store new srcstep / dststep + self.new_dststep = dststep + self.pred_dst_zero = pred_dst_zero + log(" new dststep", dststep) + + +class ExitSyscallCalled(Exception): + pass + + +class SyscallEmulator(openpower.syscalls.Dispatcher): + def __init__(self, isacaller): + self.__isacaller = isacaller + + host = os.uname().machine + bits = (64 if (sys.maxsize > (2**32)) else 32) + host = openpower.syscalls.architecture(arch=host, bits=bits) + + return super().__init__(guest="ppc64", host=host) + + def __call__(self, identifier, *arguments): + (identifier, *arguments) = map(int, (identifier, *arguments)) + return super().__call__(identifier, *arguments) + + def sys_exit_group(self, status, *rest): + self.__isacaller.halted = True + raise ExitSyscallCalled(status) + + def sys_write(self, fd, buf, count, *rest): + buf = self.__isacaller.mem.get_ctypes(buf, count, is_write=False) + try: + return os.write(fd, buf) + except OSError as e: + return -e.errno + + +class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # decoder2 - an instance of power_decoder2 # regfile - a list of initial values for the registers # initial_{etc} - initial values for SPRs, Condition Register, Mem, MSR @@ -536,7 +1190,35 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): initial_pc=0, bigendian=False, mmu=False, - icachemmu=False): + icachemmu=False, + initial_fpscr=0, + insnlog=None, + use_mmap_mem=False, + use_syscall_emu=False, + emulating_mmap=False): + if use_syscall_emu: + self.syscall = SyscallEmulator(isacaller=self) + if not use_mmap_mem: + log("forcing use_mmap_mem due to use_syscall_emu active") + use_mmap_mem = True + else: + self.syscall = None + + # we will eventually be able to load ELF files without use_syscall_emu + # (e.g. the linux kernel), so do it in a separate if block + if isinstance(initial_insns, ELFFile): + if not use_mmap_mem: + log("forcing use_mmap_mem due to loading an ELF file") + use_mmap_mem = True + if not emulating_mmap: + log("forcing emulating_mmap due to loading an ELF file") + emulating_mmap = True + + # trace log file for model output. if None do nothing + self.insnlog = insnlog + self.insnlog_is_file = hasattr(insnlog, "write") + if not self.insnlog_is_file and self.insnlog: + self.insnlog = open(self.insnlog, "w") self.bigendian = bigendian self.halted = False @@ -551,6 +1233,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if initial_insns is None: initial_insns = {} assert self.respect_pc == False, "instructions required to honor pc" + if initial_msr is None: + initial_msr = DEFAULT_MSR log("ISACaller insns", respect_pc, initial_insns, disassembly) log("ISACaller initial_msr", initial_msr) @@ -578,30 +1262,43 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if isinstance(initial_svstate, int): initial_svstate = SVP64State(initial_svstate) # SVSTATE, MSR and PC - self.svstate = initial_svstate + StepLoop.__init__(self, initial_svstate) self.msr = SelectableInt(initial_msr, 64) # underlying reg self.pc = PC() # GPR FPR SPR registers initial_sprs = deepcopy(initial_sprs) # so as not to get modified self.gpr = GPR(decoder2, self, self.svstate, regfile) self.fpr = GPR(decoder2, self, self.svstate, fpregfile) - self.spr = SPR(decoder2, initial_sprs) # initialise SPRs before MMU + # initialise SPRs before MMU + self.spr = SPR(decoder2, initial_sprs, gpr=self.gpr) # set up 4 dummy SVSHAPEs if they aren't already set up for i in range(4): sname = 'SVSHAPE%d' % i - if sname not in self.spr: - val = 0 - else: - val = self.spr[sname].value - # make sure it's an SVSHAPE - self.spr[sname] = SVSHAPE(val, self.gpr) + val = self.spr.get(sname, 0) + # make sure it's an SVSHAPE -- conversion done by SPR.__setitem__ + self.spr[sname] = val self.last_op_svshape = False # "raw" memory - self.mem = Mem(row_bytes=8, initial_mem=initial_mem) - self.mem.log_fancy(kind=LogKind.InstrInOuts) - self.imem = Mem(row_bytes=4, initial_mem=initial_insns) + if use_mmap_mem: + self.mem = MemMMap(row_bytes=8, + initial_mem=initial_mem, + misaligned_ok=True, + emulating_mmap=emulating_mmap) + self.imem = self.mem + lelf = self.mem.initialize(row_bytes=4, initial_mem=initial_insns) + if isinstance(lelf, LoadedELF): # stuff parsed from ELF + initial_pc = lelf.pc + for k, v in lelf.gprs.items(): + self.gpr[k] = SelectableInt(v, 64) + initial_fpscr = lelf.fpscr + self.mem.log_fancy(kind=LogType.InstrInOuts) + else: + self.mem = Mem(row_bytes=8, initial_mem=initial_mem, + misaligned_ok=True) + self.mem.log_fancy(kind=LogType.InstrInOuts) + self.imem = Mem(row_bytes=4, initial_mem=initial_insns) # MMU mode, redirect underlying Mem through RADIX if mmu: self.mem = RADIX(self.mem, self) @@ -612,6 +1309,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # FPR (same as GPR except for FP nums) # 4.2.2 p124 FPSCR (definitely "separate" - not in SPR) # note that mffs, mcrfs, mtfsf "manage" this FPSCR + self.fpscr = FPSCRState(initial_fpscr) + # 2.3.1 CR (and sub-fields CR0..CR6 - CR0 SO comes from XER.SO) # note that mfocrf, mfcr, mtcr, mtocrf, mcrxrx "manage" CRs # -- Done @@ -624,9 +1323,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # create CR then allow portions of it to be "selectable" (below) self.cr_fields = CRFields(initial_cr) self.cr = self.cr_fields.cr + self.cr_backup = 0 # sigh, dreadful hack: for fail-first (VLi) # "undefined", just set to variable-bit-width int (use exts "max") - # self.undefined = SelectableInt(0, 256) # TODO, not hard-code 256! + # self.undefined = SelectableInt(0, EFFECTIVELY_UNLIMITED) self.namespace = {} self.namespace.update(self.spr) @@ -644,12 +1344,16 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): 'SVSHAPE3': self.spr['SVSHAPE3'], 'CR': self.cr, 'MSR': self.msr, + 'FPSCR': self.fpscr, 'undefined': undefined, 'mode_is_64bit': True, 'SO': XER_bits['SO'], - 'XLEN': 64 # elwidth overrides, later + 'XLEN': 64 # elwidth overrides }) + for name in BFP_FLAG_NAMES: + setattr(self, name, 0) + # update pc to requested start point self.set_pc(initial_pc) @@ -661,12 +1365,21 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.decoder = decoder2.dec self.dec2 = decoder2 - super().__init__(XLEN=self.namespace["XLEN"]) + super().__init__(XLEN=self.namespace["XLEN"], FPSCR=self.fpscr) + + def trace(self, out): + if self.insnlog is None: + return + self.insnlog.write(out) @property def XLEN(self): return self.namespace["XLEN"] + @property + def FPSCR(self): + return self.fpscr + def call_trap(self, trap_addr, trap_bit): """calls TRAP and sets up NIA to the new execution location. next instruction will begin at trap_addr. @@ -681,7 +1394,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): TRAP function is callable from inside the pseudocode itself, hence the default arguments. when calling from inside ISACaller it is best to use call_trap() + + trap_addr: int | SelectableInt + the address to go to (before any modifications from `KAIVB`) + trap_bit: int | None + the bit in `SRR1` to set, `None` means don't set any bits. """ + if isinstance(trap_addr, SelectableInt): + trap_addr = trap_addr.value # https://bugs.libre-soc.org/show_bug.cgi?id=859 kaivb = self.spr['KAIVB'].value msr = self.namespace['MSR'].value @@ -693,8 +1413,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.spr['SRR1'].value = msr if self.is_svp64_mode: self.spr['SVSRR0'] = self.namespace['SVSTATE'].value - self.trap_nia = SelectableInt(trap_addr | (kaivb&~0x1fff), 64) - self.spr['SRR1'][trap_bit] = 1 # change *copy* of MSR in SRR1 + self.trap_nia = SelectableInt(trap_addr | (kaivb & ~0x1fff), 64) + if trap_bit is not None: + self.spr['SRR1'][trap_bit] = 1 # change *copy* of MSR in SRR1 # set exception bits. TODO: this should, based on the address # in figure 66 p1065 V3.0B and the table figure 65 p1063 set these @@ -721,30 +1442,31 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): def memassign(self, ea, sz, val): self.mem.memassign(ea, sz, val) - def prep_namespace(self, insn_name, formname, op_fields): + def prep_namespace(self, insn_name, formname, op_fields, xlen): # TODO: get field names from form in decoder*1* (not decoder2) # decoder2 is hand-created, and decoder1.sigform is auto-generated # from spec # then "yield" fields only from op_fields rather than hard-coded # list, here. fields = self.decoder.sigforms[formname] - log("prep_namespace", formname, op_fields) + log("prep_namespace", formname, op_fields, insn_name) for name in op_fields: # CR immediates. deal with separately. needs modifying # pseudocode if self.is_svp64_mode and name in ['BI']: # TODO, more CRs # BI is a 5-bit, must reconstruct the value - regnum, is_vec = yield from get_pdecode_cr_in(self.dec2, name) + regnum, is_vec = yield from get_cr_in(self.dec2, name) sig = getattr(fields, name) val = yield sig # low 2 LSBs (CR field selector) remain same, CR num extended assert regnum <= 7, "sigh, TODO, 128 CR fields" val = (val & 0b11) | (regnum << 2) + elif self.is_svp64_mode and name in ['BF']: # TODO, more CRs + regnum, is_vec = yield from get_cr_out(self.dec2, "BF") + log('hack %s' % name, regnum, is_vec) + val = regnum else: - if name == 'spr': - sig = getattr(fields, name.upper()) - else: - sig = getattr(fields, name) + sig = getattr(fields, name) val = yield sig # these are all opcode fields involved in index-selection of CR, # and need to do "standard" arithmetic. CR[BA+32] for example @@ -757,6 +1479,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.namespace['XER'] = self.spr['XER'] self.namespace['CA'] = self.spr['XER'][XER_bits['CA']].value self.namespace['CA32'] = self.spr['XER'][XER_bits['CA32']].value + self.namespace['OV'] = self.spr['XER'][XER_bits['OV']].value + self.namespace['OV32'] = self.spr['XER'][XER_bits['OV32']].value + self.namespace['XLEN'] = xlen # add some SVSTATE convenience variables vl = self.svstate.vl @@ -764,26 +1489,148 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.namespace['VL'] = vl self.namespace['srcstep'] = srcstep + # take a copy of the CR field value: if non-VLi fail-first fails + # this is because the pseudocode writes *directly* to CR. sigh + self.cr_backup = self.cr.value + # sv.bc* need some extra fields - if self.is_svp64_mode and insn_name.startswith("sv.bc"): - # blegh grab bits manually - mode = yield self.dec2.rm_dec.rm_in.mode - bc_vlset = (mode & SVP64MODE.BC_VLSET) != 0 - bc_vli = (mode & SVP64MODE.BC_VLI) != 0 - bc_snz = (mode & SVP64MODE.BC_SNZ) != 0 - bc_vsb = yield self.dec2.rm_dec.bc_vsb - bc_lru = yield self.dec2.rm_dec.bc_lru - bc_gate = yield self.dec2.rm_dec.bc_gate - sz = yield self.dec2.rm_dec.pred_sz - self.namespace['ALL'] = SelectableInt(bc_gate, 1) - self.namespace['VSb'] = SelectableInt(bc_vsb, 1) - self.namespace['LRu'] = SelectableInt(bc_lru, 1) - self.namespace['VLSET'] = SelectableInt(bc_vlset, 1) - self.namespace['VLI'] = SelectableInt(bc_vli, 1) - self.namespace['sz'] = SelectableInt(sz, 1) - self.namespace['SNZ'] = SelectableInt(bc_snz, 1) - - def handle_carry_(self, inputs, outputs, already_done): + if not self.is_svp64_mode or not insn_name.startswith("sv.bc"): + return + + # blegh grab bits manually + mode = yield self.dec2.rm_dec.rm_in.mode + # convert to SelectableInt before test + mode = SelectableInt(mode, 5) + bc_vlset = mode[SVP64MODEb.BC_VLSET] != 0 + bc_vli = mode[SVP64MODEb.BC_VLI] != 0 + bc_snz = mode[SVP64MODEb.BC_SNZ] != 0 + bc_vsb = yield self.dec2.rm_dec.bc_vsb + bc_ctrtest = yield self.dec2.rm_dec.bc_ctrtest + bc_lru = yield self.dec2.rm_dec.bc_lru + bc_gate = yield self.dec2.rm_dec.bc_gate + sz = yield self.dec2.rm_dec.pred_sz + self.namespace['mode'] = SelectableInt(mode, 5) + self.namespace['ALL'] = SelectableInt(bc_gate, 1) + self.namespace['VSb'] = SelectableInt(bc_vsb, 1) + self.namespace['LRu'] = SelectableInt(bc_lru, 1) + self.namespace['CTRtest'] = SelectableInt(bc_ctrtest, 1) + self.namespace['VLSET'] = SelectableInt(bc_vlset, 1) + self.namespace['VLI'] = SelectableInt(bc_vli, 1) + self.namespace['sz'] = SelectableInt(sz, 1) + self.namespace['SNZ'] = SelectableInt(bc_snz, 1) + + def get_kludged_op_add_ca_ov(self, inputs, inp_ca_ov): + """ this was not at all necessary to do. this function massively + duplicates - in a laborious and complex fashion - the contents of + the CSV files that were extracted two years ago from microwatt's + source code. A-inversion is the "inv A" column, output inversion + is the "inv out" column, carry-in equal to 0 or 1 or CA is the + "cry in" column + + all of that information is available in + self.instrs[ins_name].op_fields + where info is usually assigned to self.instrs[ins_name] + + https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=openpower/isatables/minor_31.csv;hb=HEAD + + the immediate constants are *also* decoded correctly and placed + usually by DecodeIn2Imm into operand2, as part of power_decoder2.py + """ + def ca(a, b, ca_in, width): + mask = (1 << width) - 1 + y = (a & mask) + (b & mask) + ca_in + return y >> width + + asmcode = yield self.dec2.dec.op.asmcode + insn = insns.get(asmcode) + SI = yield self.dec2.dec.SI + SI &= 0xFFFF + CA, OV = inp_ca_ov + inputs = [i.value for i in inputs] + if SI & 0x8000: + SI -= 0x10000 + if insn in ("add", "addo", "addc", "addco"): + a = inputs[0] + b = inputs[1] + ca_in = 0 + elif insn == "addic" or insn == "addic.": + a = inputs[0] + b = SI + ca_in = 0 + elif insn in ("subf", "subfo", "subfc", "subfco"): + a = ~inputs[0] + b = inputs[1] + ca_in = 1 + elif insn == "subfic": + a = ~inputs[0] + b = SI + ca_in = 1 + elif insn == "adde" or insn == "addeo": + a = inputs[0] + b = inputs[1] + ca_in = CA + elif insn == "subfe" or insn == "subfeo": + a = ~inputs[0] + b = inputs[1] + ca_in = CA + elif insn == "addme" or insn == "addmeo": + a = inputs[0] + b = ~0 + ca_in = CA + elif insn == "addze" or insn == "addzeo": + a = inputs[0] + b = 0 + ca_in = CA + elif insn == "subfme" or insn == "subfmeo": + a = ~inputs[0] + b = ~0 + ca_in = CA + elif insn == "subfze" or insn == "subfzeo": + a = ~inputs[0] + b = 0 + ca_in = CA + elif insn == "addex": + # CA[32] aren't actually written, just generate so we have + # something to return + ca64 = ov64 = ca(inputs[0], inputs[1], OV, 64) + ca32 = ov32 = ca(inputs[0], inputs[1], OV, 32) + return ca64, ca32, ov64, ov32 + elif insn == "neg" or insn == "nego": + a = ~inputs[0] + b = 0 + ca_in = 1 + else: + raise NotImplementedError( + "op_add kludge unimplemented instruction: ", asmcode, insn) + + ca64 = ca(a, b, ca_in, 64) + ca32 = ca(a, b, ca_in, 32) + ov64 = ca64 != ca(a, b, ca_in, 63) + ov32 = ca32 != ca(a, b, ca_in, 31) + return ca64, ca32, ov64, ov32 + + def handle_carry_(self, inputs, output, ca, ca32, inp_ca_ov): + if ca is not None and ca32 is not None: + return + op = yield self.dec2.e.do.insn_type + if op == MicrOp.OP_ADD.value and ca is None and ca32 is None: + retval = yield from self.get_kludged_op_add_ca_ov( + inputs, inp_ca_ov) + ca, ca32, ov, ov32 = retval + asmcode = yield self.dec2.dec.op.asmcode + if insns.get(asmcode) == 'addex': + # TODO: if 32-bit mode, set ov to ov32 + self.spr['XER'][XER_bits['OV']] = ov + self.spr['XER'][XER_bits['OV32']] = ov32 + log(f"write OV/OV32 OV={ov} OV32={ov32}", + kind=LogType.InstrInOuts) + else: + # TODO: if 32-bit mode, set ca to ca32 + self.spr['XER'][XER_bits['CA']] = ca + self.spr['XER'][XER_bits['CA32']] = ca32 + log(f"write CA/CA32 CA={ca} CA32={ca32}", + kind=LogType.InstrInOuts) + return inv_a = yield self.dec2.e.do.invert_in if inv_a: inputs[0] = ~inputs[0] @@ -792,12 +1639,6 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if imm_ok: imm = yield self.dec2.e.do.imm_data.data inputs.append(SelectableInt(imm, 64)) - assert len(outputs) >= 1 - log("outputs", repr(outputs)) - if isinstance(outputs, list) or isinstance(outputs, tuple): - output = outputs[0] - else: - output = outputs gts = [] for x in inputs: log("gt input", x, output) @@ -806,10 +1647,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): log(gts) cy = 1 if any(gts) else 0 log("CA", cy, gts) - if not (1 & already_done): + if ca is None: # already written self.spr['XER'][XER_bits['CA']] = cy - log("inputs", already_done, inputs) # 32 bit carry # ARGH... different for OP_ADD... *sigh*... op = yield self.dec2.e.do.insn_type @@ -832,10 +1672,20 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): gts.append(gt) cy32 = 1 if any(gts) else 0 log("CA32", cy32, gts) - if not (2 & already_done): + if ca32 is None: # already written self.spr['XER'][XER_bits['CA32']] = cy32 - def handle_overflow(self, inputs, outputs, div_overflow): + def handle_overflow(self, inputs, output, div_overflow, inp_ca_ov): + op = yield self.dec2.e.do.insn_type + if op == MicrOp.OP_ADD.value: + retval = yield from self.get_kludged_op_add_ca_ov( + inputs, inp_ca_ov) + ca, ca32, ov, ov32 = retval + # TODO: if 32-bit mode, set ov to ov32 + self.spr['XER'][XER_bits['OV']] = ov + self.spr['XER'][XER_bits['OV32']] = ov32 + self.spr['XER'][XER_bits['SO']] |= ov + return if hasattr(self.dec2.e.do, "invert_in"): inv_a = yield self.dec2.e.do.invert_in if inv_a: @@ -845,8 +1695,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if imm_ok: imm = yield self.dec2.e.do.imm_data.data inputs.append(SelectableInt(imm, 64)) - assert len(outputs) >= 1 - log("handle_overflow", inputs, outputs, div_overflow) + log("handle_overflow", inputs, output, div_overflow) if len(inputs) < 2 and div_overflow is None: return @@ -856,8 +1705,6 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): ov, ov32 = div_overflow, div_overflow # arithmetic overflow can be done by analysing the input and output elif len(inputs) >= 2: - output = outputs[0] - # OV (64-bit) input_sgn = [exts(x.value, x.bits) < 0 for x in inputs] output_sgn = exts(output.value, output.bits) < 0 @@ -872,14 +1719,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # now update XER OV/OV32/SO so = self.spr['XER'][XER_bits['SO']] - new_so = so | ov # sticky overflow ORs in old with new + new_so = so | ov # sticky overflow ORs in old with new self.spr['XER'][XER_bits['OV']] = ov self.spr['XER'][XER_bits['OV32']] = ov32 self.spr['XER'][XER_bits['SO']] = new_so log(" set overflow", ov, ov32, so, new_so) - def handle_comparison(self, outputs, cr_idx=0, overflow=None, no_so=False): - out = outputs[0] + def handle_comparison(self, out, cr_idx=0, overflow=None, no_so=False): assert isinstance(out, SelectableInt), \ "out zero not a SelectableInt %s" % repr(outputs) log("handle_comparison", out.bits, hex(out.value)) @@ -899,7 +1745,11 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): SO = SelectableInt(1, 0) else: SO = self.spr['XER'][XER_bits['SO']] - log("handle_comparison SO overflow", SO, overflow) + log("handle_comparison SO", SO.value, + "overflow", overflow, + "zero", zero.value, + "+ve", positive.value, + "-ve", negative.value) # alternative overflow checking (setvl mainly at the moment) if overflow is not None and overflow == 1: SO = SelectableInt(1, 1) @@ -930,6 +1780,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): pc, insn = self.get_next_insn() yield from self.setup_next_insn(pc, insn) + # cache since it's really slow to construct + __PREFIX_CACHE = SVP64Instruction.Prefix(SelectableInt(value=0, bits=32)) + + def __decode_prefix(self, opcode): + pfx = self.__PREFIX_CACHE + pfx.storage.eq(opcode) + return pfx + def setup_next_insn(self, pc, ins): """set up next instruction """ @@ -949,9 +1807,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): yield Settle() opcode = yield self.dec2.dec.opcode_in opcode = SelectableInt(value=opcode, bits=32) - pfx = SVP64Instruction.Prefix(opcode) - log("prefix test: opcode:", pfx.po, bin(pfx.po), pfx.id) - self.is_svp64_mode = bool((pfx.po == 0b000001) and (pfx.id == 0b11)) + pfx = self.__decode_prefix(opcode) + log("prefix test: opcode:", pfx.PO, bin(pfx.PO), pfx.id) + self.is_svp64_mode = bool((pfx.PO == 0b000001) and (pfx.id == 0b11)) self.pc.update_nia(self.is_svp64_mode) # set SVP64 decode yield self.dec2.is_svp64_mode.eq(self.is_svp64_mode) @@ -974,30 +1832,26 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): """execute one instruction """ # get the disassembly code for this instruction - if self.is_svp64_mode: - if not self.disassembly: - code = yield from self.get_assembly_name() - else: - code = self.disassembly[self._pc+4] - log(" svp64 sim-execute", hex(self._pc), code) + if not self.disassembly: + code = yield from self.get_assembly_name() else: - if not self.disassembly: - code = yield from self.get_assembly_name() - else: - code = self.disassembly[self._pc] - log("sim-execute", hex(self._pc), code) + offs, dbg = 0, "" + if self.is_svp64_mode: + offs, dbg = 4, "svp64 " + code = self.disassembly[self._pc+offs] + log(" %s sim-execute" % dbg, hex(self._pc), code) opname = code.split(' ')[0] try: yield from self.call(opname) # execute the instruction except MemException as e: # check for memory errors if e.args[0] == 'unaligned': # alignment error - # run a Trap but set DAR first - print("memory unaligned exception, DAR", e.dar) + # run a Trap but set DAR first + print("memory unaligned exception, DAR", e.dar, repr(e)) self.spr['DAR'] = SelectableInt(e.dar, 64) self.call_trap(0x600, PIb.PRIV) # 0x600, privileged return elif e.args[0] == 'invalid': # invalid - # run a Trap but set DAR first + # run a Trap but set DAR first log("RADIX MMU memory invalid error, mode %s" % e.mode) if e.mode == 'EXECUTE': # XXX TODO: must set a few bits in SRR1, @@ -1017,6 +1871,17 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # not supported yet: raise e # ... re-raise + # append to the trace log file + self.trace(" # %s\n" % code) + + log("gprs after code", code) + self.gpr.dump() + crs = [] + for i in range(len(self.crl)): + crs.append(bin(self.crl[i].asint())) + log("crs", " ".join(crs)) + log("vl,maxvl", self.svstate.vl, self.svstate.maxvl) + # don't use this except in special circumstances if not self.respect_pc: self.fake_pc += 4 @@ -1048,6 +1913,11 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): else: rc_en = False rc_ok = False + # annoying: ignore rc_ok if RC1 is set (for creating *assembly name*) + RC1 = yield self.dec2.rm_dec.RC1 + if RC1: + rc_en = False + rc_ok = False # grrrr have to special-case MUL op (see DecodeOE) log("ov %d en %d rc %d en %d op %d" % (ov_ok, ov_en, rc_ok, rc_en, int_op)) @@ -1084,6 +1954,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): asmop = 'mtcrf' return asmop + def reset_remaps(self): + self.remap_loopends = [0] * 4 + self.remap_idxs = [0, 1, 2, 3] + def get_remap_indices(self): """WARNING, this function stores remap_idxs and remap_loopends in the class for later use. this to avoid problems with yield @@ -1091,6 +1965,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # go through all iterators in lock-step, advance to next remap_idx srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps() # get four SVSHAPEs. here we are hard-coding + self.reset_remaps() SVSHAPE0 = self.spr['SVSHAPE0'] SVSHAPE1 = self.spr['SVSHAPE1'] SVSHAPE2 = self.spr['SVSHAPE2'] @@ -1102,8 +1977,6 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): (SVSHAPE3, SVSHAPE3.get_iterator()), ] - self.remap_loopends = [0] * 4 - self.remap_idxs = [0, 1, 2, 3] dbg = [] for i, (shape, remap) in enumerate(remaps): # zero is "disabled" @@ -1126,7 +1999,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): dec_insn = yield self.dec2.e.do.insn return dec_insn & (1 << 20) != 0 # sigh - XFF.spr[-1]? - def call(self, name): + def call(self, name, syscall_emu_active=False): """call(opcode) - the primary execution point for instructions """ self.last_st_addr = None # reset the last known store address @@ -1140,7 +2013,11 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # TODO, asmregs is from the spec, e.g. add RT,RA,RB # see http://bugs.libre-riscv.org/show_bug.cgi?id=282 asmop = yield from self.get_assembly_name() - log("call", ins_name, asmop) + log("call", ins_name, asmop, + kind=LogType.InstrInOuts) + + # sv.setvl is *not* a loop-function. sigh + log("is_svp64_mode", self.is_svp64_mode, asmop) # check privileged int_op = yield self.dec2.dec.op.internal_op @@ -1170,6 +2047,33 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.halted = True return + # User mode system call emulation consists of several steps: + # 1. Detect whether instruction is sc or scv. + # 2. Call the HDL implementation which invokes trap. + # 3. Reroute the guest system call to host system call. + # 4. Force return from the interrupt as if we had guest OS. + if ((asmop in ("sc", "scv")) and + (self.syscall is not None) and + not syscall_emu_active): + # Memoize PC and trigger an interrupt + if self.respect_pc: + pc = self.pc.CIA.value + else: + pc = self.fake_pc + yield from self.call(asmop, syscall_emu_active=True) + + # Reroute the syscall to host OS + identifier = self.gpr(0) + arguments = map(self.gpr, range(3, 9)) + result = self.syscall(identifier, *arguments) + self.gpr.write(3, result, False, self.namespace["XLEN"]) + + # Return from interrupt + yield from self.call("rfid", syscall_emu_active=True) + return + elif ((name in ("rfid", "hrfid")) and syscall_emu_active): + asmop = "rfid" + # check illegal instruction illegal = False if ins_name not in ['mtcrf', 'mtocrf']: @@ -1177,22 +2081,47 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # list of instructions not being supported by binutils (.long) dotstrp = asmop[:-1] if asmop[-1] == '.' else asmop - if dotstrp in [ 'fsins', 'fcoss', - 'ffmadds', 'fdmadds', 'ffadds', - 'mins', 'maxs', 'minu', 'maxu', - 'setvl', 'svindex', 'svremap', 'svstep', - 'svshape', 'svshape2', - 'grev', 'ternlogi', 'bmask', 'cprop', - 'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd', - 'fmvis', 'fishmv', - ]: + if dotstrp in [*FPTRANS_INSNS, + *LDST_UPDATE_INSNS, + 'ffmadds', 'fdmadds', 'ffadds', + 'minmax', + "brh", "brw", "brd", + 'setvl', 'svindex', 'svremap', 'svstep', + 'svshape', 'svshape2', + 'ternlogi', 'bmask', 'cprop', 'gbbd', + 'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd', + 'fmvis', 'fishmv', 'pcdec', "maddedu", "divmod2du", + "dsld", "dsrd", "maddedus", + "sadd", "saddw", "sadduw", + "cffpr", "cffpro", + "mffpr", "mffprs", + "ctfpr", "ctfprs", + "mtfpr", "mtfprs", + "maddsubrs", "maddrs", "msubrs", + "cfuged", "cntlzdm", "cnttzdm", "pdepd", "pextd", + "setbc", "setbcr", "setnbc", "setnbcr", + ]: illegal = False ins_name = dotstrp + # match against instructions treated as nop, see nop below + if asmop.startswith("dcbt"): + illegal = False + ins_name = "nop" + # branch-conditional redirects to sv.bc if asmop.startswith('bc') and self.is_svp64_mode: ins_name = 'sv.%s' % ins_name + # ld-immediate-with-pi mode redirects to ld-with-postinc + ldst_imm_postinc = False + if 'u' in ins_name and self.is_svp64_mode: + ldst_pi = yield self.dec2.rm_dec.ldst_postinc + if ldst_pi: + ins_name = ins_name.replace("u", "up") + ldst_imm_postinc = True + log(" enable ld/st postinc", ins_name) + log(" post-processed name", dotstrp, ins_name, asmop) # illegal instructions call TRAP at 0x700 @@ -1215,13 +2144,39 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.update_pc_next() return + # get elwidths, defaults to 64 + xlen = 64 + ew_src = 64 + ew_dst = 64 + if self.is_svp64_mode: + ew_src = yield self.dec2.rm_dec.ew_src + ew_dst = yield self.dec2.rm_dec.ew_dst + ew_src = 8 << (3-int(ew_src)) # convert to bitlength + ew_dst = 8 << (3-int(ew_dst)) # convert to bitlength + xlen = max(ew_src, ew_dst) + log("elwidth", ew_src, ew_dst) + log("XLEN:", self.is_svp64_mode, xlen) + # look up instruction in ISA.instrs, prepare namespace - info = self.instrs[ins_name] - yield from self.prep_namespace(ins_name, info.form, info.op_fields) + if ins_name == 'pcdec': # grrrr yes there are others ("stbcx." etc.) + info = self.instrs[ins_name+"."] + elif asmop[-1] == '.' and asmop in self.instrs: + info = self.instrs[asmop] + else: + info = self.instrs[ins_name] + yield from self.prep_namespace(ins_name, info.form, info.op_fields, + xlen) + + # dict retains order + inputs = dict.fromkeys(create_full_args( + read_regs=info.read_regs, special_regs=info.special_regs, + uninit_regs=info.uninit_regs, write_regs=info.write_regs)) # preserve order of register names - input_names = create_args(list(info.read_regs) + - list(info.uninit_regs)) + write_without_special_regs = OrderedSet(info.write_regs) + write_without_special_regs -= OrderedSet(info.special_regs) + input_names = create_args([ + *info.read_regs, *info.uninit_regs, *write_without_special_regs]) log("input names", input_names) # get SVP64 entry for the current instruction @@ -1233,8 +2188,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): log("sv rm", sv_rm, dest_cr, src_cr, src_byname, dest_byname) # see if srcstep/dststep need skipping over masked-out predicate bits - if (self.is_svp64_mode or ins_name == 'setvl' or - ins_name in ['svremap', 'svstate']): + # svstep also needs advancement because it calls SVSTATE_NEXT. + # bit the remaps get computed just after pre_inc moves them on + # with remap_set_steps substituting for PowerDecider2 not doing it, + # and SVSTATE_NEXT not being able to.use yield, the preinc on + # svstep is necessary for now. + self.reset_remaps() + if (self.is_svp64_mode or ins_name in ['svstep']): yield from self.svstate_pre_inc() if self.is_svp64_mode: pre = yield from self.update_new_svstate_steps() @@ -1253,7 +2213,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if self.is_svp64_mode and vl == 0: self.pc.update(self.namespace, self.is_svp64_mode) log("SVP64: VL=0, end of call", self.namespace['CIA'], - self.namespace['NIA'], kind=LogKind.InstrInOuts) + self.namespace['NIA'], kind=LogType.InstrInOuts) return # for when SVREMAP is active, using pre-arranged schedule. @@ -1278,12 +2238,21 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): log("remap active", bin(remap_active)) # main input registers (RT, RA ...) - inputs = [] for name in input_names: - log("name", name) - regval = (yield from self.get_input(name)) - log("regval", regval) - inputs.append(regval) + if name == "overflow": + inputs[name] = SelectableInt(0, 1) + elif name == "FPSCR": + inputs[name] = self.FPSCR + elif name in ("CA", "CA32", "OV", "OV32"): + inputs[name] = self.spr['XER'][XER_bits[name]] + elif name in "CR0": + inputs[name] = self.crl[0] + elif name in spr_byname: + inputs[name] = self.spr[name] + else: + regval = (yield from self.get_input(name, ew_src, xlen)) + log("regval name", name, regval) + inputs[name] = regval # arrrrgh, awful hack, to get _RT into namespace if ins_name in ['setvl', 'svstep']: @@ -1292,37 +2261,61 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.namespace[regname] = SelectableInt(RT, 5) if RT == 0: self.namespace["RT"] = SelectableInt(0, 5) - regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, "RT") + regnum, is_vec = yield from get_idx_out(self.dec2, "RT") log('hack input reg %s %s' % (name, str(regnum)), is_vec) # in SVP64 mode for LD/ST work out immediate # XXX TODO: replace_ds for DS-Form rather than D-Form. # use info.form to detect - if self.is_svp64_mode: + if self.is_svp64_mode and not ldst_imm_postinc: yield from self.check_replace_d(info, remap_active) # "special" registers for special in info.special_regs: if special in special_sprs: - inputs.append(self.spr[special]) + inputs[special] = self.spr[special] else: - inputs.append(self.namespace[special]) + inputs[special] = self.namespace[special] # clear trap (trap) NIA self.trap_nia = None # check if this was an sv.bc* and create an indicator that # this is the last check to be made as a loop. combined with - # the ALL/ANY mode we can early-exit + # the ALL/ANY mode we can early-exit. note that BI (to test) + # is an input so there is no termination if BI is scalar + # (because early-termination is for *output* scalars) if self.is_svp64_mode and ins_name.startswith("sv.bc"): - no_in_vec = yield self.dec2.no_in_vec # BI is scalar - end_loop = no_in_vec or srcstep == vl-1 or dststep == vl-1 + end_loop = srcstep == vl-1 or dststep == vl-1 self.namespace['end_loop'] = SelectableInt(end_loop, 1) + inp_ca_ov = (self.spr['XER'][XER_bits['CA']].value, + self.spr['XER'][XER_bits['OV']].value) + + for k, v in inputs.items(): + if v is None: + v = SelectableInt(0, self.XLEN) + # prevent pseudo-code from modifying input registers + v = copy_assign_rhs(v) + if isinstance(v, SelectableInt): + v.ok = False + inputs[k] = v + # execute actual instruction here (finally) log("inputs", inputs) + inputs = list(inputs.values()) results = info.func(self, *inputs) - log("results", results) + output_names = create_args(info.write_regs) + outs = {} + # record .ok before anything after the pseudo-code can modify it + outs_ok = {} + for out, n in zip(results or [], output_names): + outs[n] = out + outs_ok[n] = True + if isinstance(out, SelectableInt): + outs_ok[n] = out.ok + log("results", outs) + log("results ok", outs_ok) # "inject" decorator takes namespace from function locals: we need to # overwrite NIA being overwritten (sigh) @@ -1343,70 +2336,166 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.last_st_addr, self.last_ld_addr) # detect if CA/CA32 already in outputs (sra*, basically) - already_done = 0 - if info.write_regs: - output_names = create_args(info.write_regs) - for name in output_names: - if name == 'CA': - already_done |= 1 - if name == 'CA32': - already_done |= 2 - - log("carry already done?", bin(already_done)) - if hasattr(self.dec2.e.do, "output_carry"): - carry_en = yield self.dec2.e.do.output_carry - else: - carry_en = False - if carry_en: - yield from self.handle_carry_(inputs, results, already_done) + ca = outs.get("CA") + ca32 = outs.get("CA32") - # check if one of the regs was named "overflow" - overflow = None - if info.write_regs: - for name, output in zip(output_names, results): - if name == 'overflow': - overflow = output - - if not self.is_svp64_mode: # yeah just no. not in parallel processing + log("carry already done?", ca, ca32, output_names) + # soc test_pipe_caller tests don't have output_carry + has_output_carry = hasattr(self.dec2.e.do, "output_carry") + carry_en = has_output_carry and (yield self.dec2.e.do.output_carry) + if carry_en: + yield from self.handle_carry_( + inputs, results[0], ca, ca32, inp_ca_ov=inp_ca_ov) + + # get output named "overflow" and "CR0" + overflow = outs.get('overflow') + cr0 = outs.get('CR0') + cr1 = outs.get('CR1') + + # soc test_pipe_caller tests don't have oe + has_oe = hasattr(self.dec2.e.do, "oe") + # yeah just no. not in parallel processing + if has_oe and not self.is_svp64_mode: # detect if overflow was in return result - if hasattr(self.dec2.e.do, "oe"): - ov_en = yield self.dec2.e.do.oe.oe - ov_ok = yield self.dec2.e.do.oe.ok - else: - ov_en = False - ov_ok = False + ov_en = yield self.dec2.e.do.oe.oe + ov_ok = yield self.dec2.e.do.oe.ok log("internal overflow", ins_name, overflow, "en?", ov_en, ov_ok) if ov_en & ov_ok: - yield from self.handle_overflow(inputs, results, overflow) + yield from self.handle_overflow( + inputs, results[0], overflow, inp_ca_ov=inp_ca_ov) # only do SVP64 dest predicated Rc=1 if dest-pred is not enabled rc_en = False if not self.is_svp64_mode or not pred_dst_zero: if hasattr(self.dec2.e.do, "rc"): rc_en = yield self.dec2.e.do.rc.rc + # don't do Rc=1 for svstep it is handled explicitly. + # XXX TODO: now that CR0 is supported, sort out svstep's pseudocode + # to write directly to CR0 instead of in ISACaller. hooyahh. if rc_en and ins_name not in ['svstep']: - if ins_name.startswith("f"): - rc_reg = "CR1" # not calculated correctly yet (not FP compares) - else: - rc_reg = "CR0" - regnum, is_vec = yield from get_pdecode_cr_out(self.dec2, rc_reg) - cmps = results - # hang on... for `setvl` actually you want to test SVSTATE.VL - is_setvl = ins_name == 'setvl' - if is_setvl: - vl = results[0].vl - cmps = (SelectableInt(vl, 64), overflow,) + if outs_ok.get('FPSCR', False): + FPSCR = outs['FPSCR'] else: - overflow = None # do not override overflow except in setvl - self.handle_comparison(cmps, regnum, overflow, no_so=is_setvl) + FPSCR = self.FPSCR + yield from self.do_rc_ov( + ins_name, results[0], overflow, cr0, cr1, FPSCR) + + # check failfirst + ffirst_hit = False, False + if self.is_svp64_mode: + sv_mode = yield self.dec2.rm_dec.sv_mode + is_cr = sv_mode == SVMode.CROP.value + chk = rc_en or is_cr + if outs_ok.get('CR', False): + # early write so check_ffirst can see value + self.namespace['CR'].eq(outs['CR']) + ffirst_hit = (yield from self.check_ffirst(info, chk, srcstep)) # any modified return results? - if info.write_regs: - for name, output in zip(output_names, results): - yield from self.check_write(info, name, output, carry_en) + yield from self.do_outregs( + info, outs, carry_en, ffirst_hit, ew_dst, outs_ok) + + # check if a FP Exception occurred. TODO for DD-FFirst, check VLi + # and raise the exception *after* if VLi=1 but if VLi=0 then + # truncate and make the exception "disappear". + if self.FPSCR.FEX and (self.msr[MSRb.FE0] or self.msr[MSRb.FE1]): + self.call_trap(0x700, PIb.FP) + return - nia_update = (yield from self.check_step_increment(results, rc_en, - asmop, ins_name)) + yield from self.do_nia(asmop, ins_name, rc_en, ffirst_hit) + + def check_ffirst(self, info, rc_en, srcstep): + """fail-first mode: checks a bit of Rc Vector, truncates VL + """ + rm_mode = yield self.dec2.rm_dec.mode + ff_inv = yield self.dec2.rm_dec.inv + cr_bit = yield self.dec2.rm_dec.cr_sel + RC1 = yield self.dec2.rm_dec.RC1 + vli_ = yield self.dec2.rm_dec.vli # VL inclusive if truncated + log(" ff rm_mode", rc_en, rm_mode, SVP64RMMode.FFIRST.value) + log(" inv", ff_inv) + log(" RC1", RC1) + log(" vli", vli_) + log(" cr_bit", cr_bit) + log(" rc_en", rc_en) + if not rc_en or rm_mode != SVP64RMMode.FFIRST.value: + return False, False + # get the CR vevtor, do BO-test + crf = "CR0" + log("asmregs", info.asmregs[0], info.write_regs) + if 'CR' in info.write_regs and 'BF' in info.asmregs[0]: + crf = 'BF' + regnum, is_vec = yield from get_cr_out(self.dec2, crf) + crtest = self.crl[regnum] + ffirst_hit = crtest[cr_bit] != ff_inv + log("cr test", crf, regnum, int(crtest), crtest, cr_bit, ff_inv) + log("cr test?", ffirst_hit) + if not ffirst_hit: + return False, False + # Fail-first activated, truncate VL + vli = SelectableInt(int(vli_), 7) + self.svstate.vl = srcstep + vli + yield self.dec2.state.svstate.eq(self.svstate.value) + yield Settle() # let decoder update + return True, vli_ + + def do_rc_ov(self, ins_name, result, overflow, cr0, cr1, FPSCR): + cr_out = yield self.dec2.op.cr_out + if cr_out == CROutSel.CR1.value: + rc_reg = "CR1" + else: + rc_reg = "CR0" + regnum, is_vec = yield from get_cr_out(self.dec2, rc_reg) + # hang on... for `setvl` actually you want to test SVSTATE.VL + is_setvl = ins_name in ('svstep', 'setvl') + if is_setvl: + result = SelectableInt(result.vl, 64) + # else: + # overflow = None # do not override overflow except in setvl + + if rc_reg == "CR1": + if cr1 is None: + cr1 = int(FPSCR.FX) << 3 + cr1 |= int(FPSCR.FEX) << 2 + cr1 |= int(FPSCR.VX) << 1 + cr1 |= int(FPSCR.OX) + log("default fp cr1", cr1) + else: + log("explicit cr1", cr1) + self.crl[regnum].eq(cr1) + elif cr0 is None: + # if there was not an explicit CR0 in the pseudocode, + # do implicit Rc=1 + self.handle_comparison(result, regnum, overflow, no_so=is_setvl) + else: + # otherwise we just blat CR0 into the required regnum + log("explicit rc0", cr0) + self.crl[regnum].eq(cr0) + + def do_outregs(self, info, outs, ca_en, ffirst_hit, ew_dst, outs_ok): + ffirst_hit, vli = ffirst_hit + # write out any regs for this instruction, but only if fail-first is ok + # XXX TODO: allow CR-vector to be written out even if ffirst fails + if not ffirst_hit or vli: + for name, output in outs.items(): + if not outs_ok[name]: + log("skipping writing output with .ok=False", name, output) + continue + yield from self.check_write(info, name, output, ca_en, ew_dst) + # restore the CR value on non-VLI failfirst (from sv.cmp and others + # which write directly to CR in the pseudocode (gah, what a mess) + # if ffirst_hit and not vli: + # self.cr.value = self.cr_backup + + def do_nia(self, asmop, ins_name, rc_en, ffirst_hit): + ffirst_hit, vli = ffirst_hit + if ffirst_hit: + self.svp64_reset_loop() + nia_update = True + else: + # check advancement of src/dst/sub-steps and if PC needs updating + nia_update = (yield from self.check_step_increment( + rc_en, asmop, ins_name)) if nia_update: self.update_pc_next() @@ -1429,14 +2518,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if op == MicrOp.OP_LOAD.value: if remap_active: offsmul = yield self.dec2.in1_step - log("D-field REMAP src", imm, offsmul) + log("D-field REMAP src", imm, offsmul, ldstmode) else: offsmul = (srcstep * (subvl+1)) + ssubstep - log("D-field src", imm, offsmul) + log("D-field src", imm, offsmul, ldstmode) elif op == MicrOp.OP_STORE.value: # XXX NOTE! no bit-reversed STORE! this should not ever be used offsmul = (dststep * (subvl+1)) + dsubstep - log("D-field dst", imm, offsmul) + log("D-field dst", imm, offsmul, ldstmode) # Unit-Strided LD/ST adds offset*width to immediate if ldstmode == SVP64LDSTmode.UNITSTRIDE.value: ldst_len = yield self.dec2.e.do.data_len @@ -1461,31 +2550,48 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): else: self.namespace['D'] = imm - def get_input(self, name): + def get_input(self, name, ew_src, xlen): # using PowerDecoder2, first, find the decoder index. # (mapping name RA RB RC RS to in1, in2, in3) - regnum, is_vec = yield from get_pdecode_idx_in(self.dec2, name) + regnum, is_vec = yield from get_idx_in(self.dec2, name, True) if regnum is None: # doing this is not part of svp64, it's because output # registers, to be modified, need to be in the namespace. - regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, name) + regnum, is_vec = yield from get_idx_out(self.dec2, name, True) if regnum is None: - regnum, is_vec = yield from get_pdecode_idx_out2(self.dec2, name) + regnum, is_vec = yield from get_idx_out2(self.dec2, name, True) + + if isinstance(regnum, tuple): + (regnum, base, offs) = regnum + else: + base, offs = regnum, 0 # temporary HACK # in case getting the register number is needed, _RA, _RB + # (HACK: only in straight non-svp64-mode for now, or elwidth == 64) regname = "_" + name - self.namespace[regname] = regnum + if not self.is_svp64_mode or ew_src == 64: + self.namespace[regname] = regnum + elif regname in self.namespace: + del self.namespace[regname] + if not self.is_svp64_mode or not self.pred_src_zero: log('reading reg %s %s' % (name, str(regnum)), is_vec) if name in fregs: - reg_val = SelectableInt(self.fpr(regnum)) - log("read reg %d: 0x%x" % (regnum, reg_val.value)) + fval = self.fpr(base, is_vec, offs, ew_src) + reg_val = SelectableInt(fval) + assert ew_src == XLEN, "TODO fix elwidth conversion" + self.trace("r:FPR:%d:%d:%d " % (base, offs, ew_src)) + log("read fp reg %d/%d: 0x%x" % (base, offs, reg_val.value), + kind=LogType.InstrInOuts) elif name is not None: - reg_val = SelectableInt(self.gpr(regnum)) - log("read reg %d: 0x%x" % (regnum, reg_val.value)) + gval = self.gpr(base, is_vec, offs, ew_src) + reg_val = SelectableInt(gval.value, bits=xlen) + self.trace("r:GPR:%d:%d:%d " % (base, offs, ew_src)) + log("read int reg %d/%d: 0x%x" % (base, offs, reg_val.value), + kind=LogType.InstrInOuts) else: log('zero input reg %s %s' % (name, str(regnum)), is_vec) - reg_val = 0 + reg_val = SelectableInt(0, ew_src) return reg_val def remap_set_steps(self, remaps): @@ -1511,12 +2617,25 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): mi2 = self.svstate.mi2 mo0 = self.svstate.mo0 mo1 = self.svstate.mo1 - steps = [(self.dec2.in1_step, mi0), # RA - (self.dec2.in2_step, mi1), # RB - (self.dec2.in3_step, mi2), # RC - (self.dec2.o_step, mo0), # RT - (self.dec2.o2_step, mo1), # EA + steps = [[self.dec2.in1_step, mi0], # RA + [self.dec2.in2_step, mi1], # RB + [self.dec2.in3_step, mi2], # RC + [self.dec2.o_step, mo0], # RT + [self.dec2.o2_step, mo1], # EA ] + if False: # TODO + rnames = ['RA', 'RB', 'RC', 'RT', 'RS'] + for i, reg in enumerate(rnames): + idx = yield from get_idx_map(self.dec2, reg) + if idx is None: + idx = yield from get_idx_map(self.dec2, "F"+reg) + if idx == 1: # RA + steps[i][0] = self.dec2.in1_step + elif idx == 2: # RB + steps[i][0] = self.dec2.in2_step + elif idx == 3: # RC + steps[i][0] = self.dec2.in3_step + log("remap step", i, reg, idx, steps[i][1]) remap_idxs = self.remap_idxs rremaps = [] # now cross-index the required SHAPE for each of 3-in 2-out regs @@ -1528,18 +2647,28 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if shape.value == 0x0: continue # now set the actual requested step to the current index - yield dstep.eq(remap_idx) + if dstep is not None: + yield dstep.eq(remap_idx) # debug printout info - rremaps.append((shape.mode, i, rnames[i], shape_idx, remap_idx)) + rremaps.append((shape.mode, hex(shape.value), dstep, + i, rnames[i], shape_idx, remap_idx)) for x in rremaps: log("shape remap", x) - def check_write(self, info, name, output, carry_en): + def check_write(self, info, name, output, carry_en, ew_dst): if name == 'overflow': # ignore, done already (above) return + if name == 'CR0': # ignore, done already (above) + return if isinstance(output, int): - output = SelectableInt(output, 256) + output = SelectableInt(output, EFFECTIVELY_UNLIMITED) + # write FPSCR + if name in ['FPSCR', ]: + log("write FPSCR 0x%x" % (output.value)) + self.FPSCR.eq(output) + return + # write carry flags if name in ['CA', 'CA32']: if carry_en: log("writing %s to XER" % name, output) @@ -1547,114 +2676,117 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.spr['XER'][XER_bits[name]] = output.value else: log("NOT writing %s to XER" % name, output) - elif name in info.special_regs: + return + # write special SPRs + if name in info.special_regs: log('writing special %s' % name, output, special_sprs) - log("write reg %s 0x%x" % (name, output.value)) + log("write reg %s 0x%x" % (name, output.value), + kind=LogType.InstrInOuts) if name in special_sprs: self.spr[name] = output else: self.namespace[name].eq(output) if name == 'MSR': log('msr written', hex(self.msr.value)) + return + # find out1/out2 PR/FPR + regnum, is_vec = yield from get_idx_out(self.dec2, name, True) + if regnum is None: + regnum, is_vec = yield from get_idx_out2(self.dec2, name, True) + if regnum is None: + # temporary hack for not having 2nd output + regnum = yield getattr(self.decoder, name) + is_vec = False + # convenient debug prefix + if name in fregs: + reg_prefix = 'f' else: - regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, name) - if regnum is None: - regnum, is_vec = yield from get_pdecode_idx_out2( - self.dec2, name) - if regnum is None: - # temporary hack for not having 2nd output - regnum = yield getattr(self.decoder, name) - is_vec = False - if self.is_svp64_mode and self.pred_dst_zero: - log('zeroing reg %d %s' % (regnum, str(output)), - is_vec) - output = SelectableInt(0, 256) - else: - if name in fregs: - reg_prefix = 'f' - else: - reg_prefix = 'r' - log("write reg %s%d %0xx" % (reg_prefix, regnum, output.value)) - if output.bits > 64: - output = SelectableInt(output.value, 64) - if name in fregs: - self.fpr[regnum] = output - else: - self.gpr[regnum] = output + reg_prefix = 'r' + # check zeroing due to predicate bit being zero + if self.is_svp64_mode and self.pred_dst_zero: + log('zeroing reg %s %s' % (str(regnum), str(output)), is_vec) + output = SelectableInt(0, EFFECTIVELY_UNLIMITED) + log("write reg %s%s 0x%x ew %d" % (reg_prefix, str(regnum), + output.value, ew_dst), + kind=LogType.InstrInOuts) + # zero-extend tov64 bit begore storing (should use EXT oh well) + if output.bits > 64: + output = SelectableInt(output.value, 64) + rnum, base, offset = regnum + if name in fregs: + self.fpr.write(regnum, output, is_vec, ew_dst) + self.trace("w:FPR:%d:%d:%d " % (rnum, offset, ew_dst)) + else: + self.gpr.write(regnum, output, is_vec, ew_dst) + self.trace("w:GPR:%d:%d:%d " % (rnum, offset, ew_dst)) - def check_step_increment(self, results, rc_en, asmop, ins_name): + def check_step_increment(self, rc_en, asmop, ins_name): # check if it is the SVSTATE.src/dest step that needs incrementing # this is our Sub-Program-Counter loop from 0 to VL-1 + if not self.allow_next_step_inc: + if self.is_svp64_mode: + return (yield from self.svstate_post_inc(ins_name)) + + # XXX only in non-SVP64 mode! + # record state of whether the current operation was an svshape, + # OR svindex! + # to be able to know if it should apply in the next instruction. + # also (if going to use this instruction) should disable ability + # to interrupt in between. sigh. + self.last_op_svshape = asmop in ['svremap', 'svindex', + 'svshape2'] + return True + pre = False post = False nia_update = True - if self.allow_next_step_inc: - log("SVSTATE_NEXT: inc requested, mode", - self.svstate_next_mode, self.allow_next_step_inc) - yield from self.svstate_pre_inc() - pre = yield from self.update_new_svstate_steps() - if pre: - # reset at end of loop including exit Vertical Mode - log("SVSTATE_NEXT: end of loop, reset") - self.svp64_reset_loop() - self.svstate.vfirst = 0 - self.update_nia() - if not rc_en: - return True - results = [SelectableInt(0, 64)] - self.handle_comparison(results) # CR0 + log("SVSTATE_NEXT: inc requested, mode", + self.svstate_next_mode, self.allow_next_step_inc) + yield from self.svstate_pre_inc() + pre = yield from self.update_new_svstate_steps() + if pre: + # reset at end of loop including exit Vertical Mode + log("SVSTATE_NEXT: end of loop, reset") + self.svp64_reset_loop() + self.svstate.vfirst = 0 + self.update_nia() + if not rc_en: return True - if self.allow_next_step_inc == 2: - log("SVSTATE_NEXT: read") - nia_update = (yield from self.svstate_post_inc(ins_name)) - else: - log("SVSTATE_NEXT: post-inc") - # use actual src/dst-step here to check end, do NOT - # use bit-reversed version - srcstep, dststep = self.new_srcstep, self.new_dststep - ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep - remaps = self.get_remap_indices() - remap_idxs = self.remap_idxs - vl = self.svstate.vl - subvl = yield self.dec2.rm_dec.rm_in.subvl - end_src = srcstep == vl-1 - end_dst = dststep == vl-1 - if self.allow_next_step_inc != 2: - yield from self.advance_svstate_steps(end_src, end_dst) - self.namespace['SVSTATE'] = self.svstate.spr - # set CR0 (if Rc=1) based on end - if rc_en: - endtest = 1 if (end_src or end_dst) else 0 - #results = [SelectableInt(endtest, 64)] - # self.handle_comparison(results) # CR0 - - # see if svstep was requested, if so, which SVSTATE - endings = 0b111 - if self.svstate_next_mode > 0: - shape_idx = self.svstate_next_mode.value-1 - endings = self.remap_loopends[shape_idx] - cr_field = SelectableInt((~endings) << 1 | endtest, 4) - log("svstep Rc=1, CR0", cr_field) - self.crl[0].eq(cr_field) # CR0 - if end_src or end_dst: - # reset at end of loop including exit Vertical Mode - log("SVSTATE_NEXT: after increments, reset") - self.svp64_reset_loop() - self.svstate.vfirst = 0 - return nia_update - - if self.is_svp64_mode: - return (yield from self.svstate_post_inc(ins_name)) - - # XXX only in non-SVP64 mode! - # record state of whether the current operation was an svshape, - # OR svindex! - # to be able to know if it should apply in the next instruction. - # also (if going to use this instruction) should disable ability - # to interrupt in between. sigh. - self.last_op_svshape = asmop in ['svremap', 'svindex', 'svshape2'] - - return True + self.handle_comparison(SelectableInt(0, 64)) # CR0 + return True + if self.allow_next_step_inc == 2: + log("SVSTATE_NEXT: read") + nia_update = (yield from self.svstate_post_inc(ins_name)) + else: + log("SVSTATE_NEXT: post-inc") + # use actual (cached) src/dst-step here to check end + remaps = self.get_remap_indices() + remap_idxs = self.remap_idxs + vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl + if self.allow_next_step_inc != 2: + yield from self.advance_svstate_steps() + #self.namespace['SVSTATE'] = self.svstate.spr + # set CR0 (if Rc=1) based on end + endtest = 1 if self.at_loopend() else 0 + if rc_en: + #results = [SelectableInt(endtest, 64)] + # self.handle_comparison(results) # CR0 + + # see if svstep was requested, if so, which SVSTATE + endings = 0b111 + if self.svstate_next_mode > 0: + shape_idx = self.svstate_next_mode.value-1 + endings = self.remap_loopends[shape_idx] + cr_field = SelectableInt((~endings) << 1 | endtest, 4) + log("svstep Rc=1, CR0", cr_field, endtest) + self.crl[0].eq(cr_field) # CR0 + if endtest: + # reset at end of loop including exit Vertical Mode + log("SVSTATE_NEXT: after increments, reset") + self.svp64_reset_loop() + self.svstate.vfirst = 0 + return nia_update def SVSTATE_NEXT(self, mode, submode): """explicitly moves srcstep/dststep on to next element, for @@ -1677,139 +2809,77 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if self.svstate_next_mode == 6: self.svstate_next_mode = 0 return SelectableInt(self.svstate.dststep, 7) + if self.svstate_next_mode == 7: + self.svstate_next_mode = 0 + return SelectableInt(self.svstate.ssubstep, 7) + if self.svstate_next_mode == 8: + self.svstate_next_mode = 0 + return SelectableInt(self.svstate.dsubstep, 7) return SelectableInt(0, 7) - def svstate_pre_inc(self): - """check if srcstep/dststep need to skip over masked-out predicate bits - note that this is not supposed to do anything to substep, - it is purely for skipping masked-out bits - """ - # get SVSTATE VL (oh and print out some debug stuff) - # yield Delay(1e-10) # make changes visible - vl = self.svstate.vl - subvl = yield self.dec2.rm_dec.rm_in.subvl - srcstep = self.svstate.srcstep - dststep = self.svstate.dststep - ssubstep = self.svstate.ssubstep - dsubstep = self.svstate.dsubstep - sv_a_nz = yield self.dec2.sv_a_nz - fft_mode = yield self.dec2.use_svp64_fft - in1 = yield self.dec2.e.read_reg1.data - log("SVP64: VL, subvl, srcstep, dststep, ssubstep, dsybstep, sv_a_nz, " - "in1 fft, svp64", - vl, subvl, srcstep, dststep, ssubstep, dsubstep, - sv_a_nz, in1, fft_mode, - self.is_svp64_mode) - - # get predicate mask (all 64 bits) - srcmask = dstmask = 0xffff_ffff_ffff_ffff - - pmode = yield self.dec2.rm_dec.predmode - pack = yield self.dec2.rm_dec.pack - unpack = yield self.dec2.rm_dec.unpack - reverse_gear = yield self.dec2.rm_dec.reverse_gear - sv_ptype = yield self.dec2.dec.op.SV_Ptype - srcpred = yield self.dec2.rm_dec.srcpred - dstpred = yield self.dec2.rm_dec.dstpred - pred_src_zero = yield self.dec2.rm_dec.pred_sz - pred_dst_zero = yield self.dec2.rm_dec.pred_dz - if pmode == SVP64PredMode.INT.value: - srcmask = dstmask = get_predint(self.gpr, dstpred) - if sv_ptype == SVPtype.P2.value: - srcmask = get_predint(self.gpr, srcpred) - elif pmode == SVP64PredMode.CR.value: - srcmask = dstmask = get_predcr(self.crl, dstpred, vl) - if sv_ptype == SVPtype.P2.value: - srcmask = get_predcr(self.crl, srcpred, vl) - # work out if the ssubsteps are completed - ssubstart = ssubstep == 0 - dsubstart = dsubstep == 0 - log(" pmode", pmode) - log(" pack/unpack", pack, unpack) - log(" reverse", reverse_gear) - log(" ptype", sv_ptype) - log(" srcpred", bin(srcpred)) - log(" dstpred", bin(dstpred)) - log(" srcmask", bin(srcmask)) - log(" dstmask", bin(dstmask)) - log(" pred_sz", bin(pred_src_zero)) - log(" pred_dz", bin(pred_dst_zero)) - log(" ssubstart", ssubstart) - log(" dsubstart", dsubstart) - - # okaaay, so here we simply advance srcstep (TODO dststep) - # this can ONLY be done at the beginning of the "for" loop - # (this is all actually a FSM so it's hell to keep track sigh) - if ssubstart: - # until the predicate mask has a "1" bit... or we run out of VL - # let srcstep==VL be the indicator to move to next instruction - if not pred_src_zero: - while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl): - log(" sskip", bin(1 << srcstep)) - srcstep += 1 - if dsubstart: - # same for dststep - if not pred_dst_zero: - while (((1 << dststep) & dstmask) == 0) and (dststep != vl): - log(" dskip", bin(1 << dststep)) - dststep += 1 - - # now work out if the relevant mask bits require zeroing - if pred_dst_zero: - pred_dst_zero = ((1 << dststep) & dstmask) == 0 - if pred_src_zero: - pred_src_zero = ((1 << srcstep) & srcmask) == 0 - - # store new srcstep / dststep - self.new_srcstep, self.new_dststep = (srcstep, dststep) - self.new_ssubstep, self.new_dsubstep = (ssubstep, dsubstep) - self.pred_dst_zero, self.pred_src_zero = (pred_dst_zero, pred_src_zero) - log(" new srcstep", srcstep) - log(" new dststep", dststep) - log(" new ssubstep", ssubstep) - log(" new dsubstep", dsubstep) - def get_src_dststeps(self): """gets srcstep, dststep, and ssubstep, dsubstep """ return (self.new_srcstep, self.new_dststep, self.new_ssubstep, self.new_dsubstep) - def update_new_svstate_steps(self): - # note, do not get the bit-reversed srcstep here! - srcstep, dststep = self.new_srcstep, self.new_dststep - ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep + def update_svstate_namespace(self, overwrite_svstate=True): + if overwrite_svstate: + # note, do not get the bit-reversed srcstep here! + srcstep, dststep = self.new_srcstep, self.new_dststep + ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep - # update SVSTATE with new srcstep - self.svstate.srcstep = srcstep - self.svstate.dststep = dststep - self.svstate.ssubstep = ssubstep - self.svstate.dsubstep = dsubstep + # update SVSTATE with new srcstep + self.svstate.srcstep = srcstep + self.svstate.dststep = dststep + self.svstate.ssubstep = ssubstep + self.svstate.dsubstep = dsubstep self.namespace['SVSTATE'] = self.svstate yield self.dec2.state.svstate.eq(self.svstate.value) yield Settle() # let decoder update + + def update_new_svstate_steps(self, overwrite_svstate=True): + yield from self.update_svstate_namespace(overwrite_svstate) srcstep = self.svstate.srcstep dststep = self.svstate.dststep ssubstep = self.svstate.ssubstep dsubstep = self.svstate.dsubstep + pack = self.svstate.pack + unpack = self.svstate.unpack vl = self.svstate.vl + sv_mode = yield self.dec2.rm_dec.sv_mode subvl = yield self.dec2.rm_dec.rm_in.subvl + rm_mode = yield self.dec2.rm_dec.mode + ff_inv = yield self.dec2.rm_dec.inv + cr_bit = yield self.dec2.rm_dec.cr_sel log(" srcstep", srcstep) log(" dststep", dststep) + log(" pack", pack) + log(" unpack", unpack) log(" ssubstep", ssubstep) log(" dsubstep", dsubstep) log(" vl", vl) log(" subvl", subvl) + log(" rm_mode", rm_mode) + log(" sv_mode", sv_mode) + log(" inv", ff_inv) + log(" cr_bit", cr_bit) # check if end reached (we let srcstep overrun, above) # nothing needs doing (TODO zeroing): just do next instruction - return srcstep == vl or dststep == vl + if self.loopend: + return True + return ((ssubstep == subvl and srcstep == vl) or + (dsubstep == subvl and dststep == vl)) def svstate_post_inc(self, insn_name, vf=0): # check if SV "Vertical First" mode is enabled vfirst = self.svstate.vfirst log(" SV Vertical First", vf, vfirst) if not vf and vfirst == 1: + if insn_name.startswith("sv.bc"): + self.update_pc_next() + return False self.update_nia() return True @@ -1823,6 +2893,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): dststep = self.svstate.dststep ssubstep = self.svstate.ssubstep dsubstep = self.svstate.dsubstep + pack = self.svstate.pack + unpack = self.svstate.unpack rm_mode = yield self.dec2.rm_dec.mode reverse_gear = yield self.dec2.rm_dec.reverse_gear sv_ptype = yield self.dec2.dec.op.SV_Ptype @@ -1835,11 +2907,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): log(" svstate.dststep", dststep) log(" svstate.ssubstep", ssubstep) log(" svstate.dsubstep", dsubstep) + log(" svstate.pack", pack) + log(" svstate.unpack", unpack) log(" mode", rm_mode) log(" reverse", reverse_gear) log(" out_vec", out_vec) log(" in_vec", in_vec) - log(" sv_ptype", sv_ptype, sv_ptype == SVPtype.P2.value) + log(" sv_ptype", sv_ptype, sv_ptype == SVPType.P2.value) # check if this was an sv.bc* and if so did it succeed if self.is_svp64_mode and insn_name.startswith("sv.bc"): end_loop = self.namespace['end_loop'] @@ -1851,13 +2925,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # check if srcstep needs incrementing by one, stop PC advancing # but for 2-pred both src/dest have to be checked. # XXX this might not be true! it may just be LD/ST - if sv_ptype == SVPtype.P2.value: + if sv_ptype == SVPType.P2.value: svp64_is_vector = (out_vec or in_vec) else: svp64_is_vector = out_vec # loops end at the first "hit" (source or dest) - loopend = ((srcstep == vl-1 and ssubstep == subvl) or - (dststep == vl-1 and dsubstep == subvl)) + yield from self.advance_svstate_steps() + loopend = self.loopend + log("loopend", svp64_is_vector, loopend) if not svp64_is_vector or loopend: # reset loop to zero and update NIA self.svp64_reset_loop() @@ -1866,44 +2941,20 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): return True # still looping, advance and update NIA - yield from self.advance_svstate_steps() self.namespace['SVSTATE'] = self.svstate + # not an SVP64 branch, so fix PC (NIA==CIA) for next loop # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64) # this way we keep repeating the same instruction (with new steps) - self.pc.NIA.value = self.pc.CIA.value + self.pc.NIA.eq(self.pc.CIA) self.namespace['NIA'] = self.pc.NIA log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA']) return False # DO NOT allow PC update whilst Sub-PC loop running - def advance_svstate_steps(self, end_src=False, end_dst=False): - """ advance sub/steps. note that Pack/Unpack *INVERTS* the order. - TODO when Pack/Unpack is set, substep becomes the *outer* loop - """ - subvl = yield self.dec2.rm_dec.rm_in.subvl - # first source step - ssubstep = self.svstate.ssubstep - end_sub = ssubstep == subvl - if end_sub: - if not end_src: - self.svstate.srcstep += SelectableInt(1, 7) - self.svstate.ssubstep = SelectableInt(0, 2) # reset - else: - self.svstate.ssubstep += SelectableInt(1, 2) # advance ssubstep - # now dest step - dsubstep = self.svstate.dsubstep - end_sub = dsubstep == subvl - if end_sub: - if not end_dst: - self.svstate.dststep += SelectableInt(1, 7) - self.svstate.dsubstep = SelectableInt(0, 2) # reset - else: - self.svstate.dsubstep += SelectableInt(1, 2) # advance ssubstep - def update_pc_next(self): # UPDATE program counter self.pc.update(self.namespace, self.is_svp64_mode) - self.svstate.spr = self.namespace['SVSTATE'] + #self.svstate.spr = self.namespace['SVSTATE'] log("end of call", self.namespace['CIA'], self.namespace['NIA'], self.namespace['SVSTATE']) @@ -1913,6 +2964,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.svstate.dststep = 0 self.svstate.ssubstep = 0 self.svstate.dsubstep = 0 + self.loopend = False log(" svstate.srcstep loop end (PC to update)") self.namespace['SVSTATE'] = self.svstate