X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fopenpower%2Fdecoder%2Fisa%2Fcaller.py;h=da73f98b718c5087f0204ae5363954f4bc0beda9;hb=e22e5ec01f21a88185f17ec2db962642a47bf4f9;hp=38b239dcde18172f61db4db029181da970236635;hpb=ed837783f433489920d97aaefac4f969eea9f8d6;p=openpower-isa.git diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index 38b239dc..da73f98b 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -16,13 +16,17 @@ related bugs: from collections import namedtuple from copy import deepcopy from functools import wraps +import os +import sys +from elftools.elf.elffile import ELFFile # for isinstance from nmigen.sim import Settle +import openpower.syscalls from openpower.consts import (MSRb, PIb, # big-endian (PowerISA versions) SVP64CROffs, SVP64MODEb) from openpower.decoder.helpers import (ISACallerHelper, ISAFPHelpers, exts, - gtu, undefined) -from openpower.decoder.isa.mem import Mem, MemException + gtu, undefined, copy_assign_rhs) +from openpower.decoder.isa.mem import Mem, MemMMap, MemException, LoadedELF from openpower.decoder.isa.radixmmu import RADIX from openpower.decoder.isa.svshape import SVSHAPE from openpower.decoder.isa.svstate import SVP64State @@ -33,12 +37,22 @@ from openpower.decoder.power_enums import (FPTRANS_INSNS, CRInSel, CROutSel, SVP64LDSTmode, SVP64PredCR, SVP64PredInt, SVP64PredMode, SVP64RMMode, SVPType, XER_bits, - insns, spr_byname, spr_dict) -from openpower.decoder.power_insn import SVP64Instruction + insns, spr_byname, spr_dict, + BFP_FLAG_NAMES) +from openpower.insndb.core import SVP64Instruction from openpower.decoder.power_svp64 import SVP64RM, decode_extra from openpower.decoder.selectable_int import (FieldSelectableInt, - SelectableInt, selectconcat) -from openpower.util import LogKind, log + SelectableInt, selectconcat, + EFFECTIVELY_UNLIMITED) +from openpower.consts import DEFAULT_MSR +from openpower.fpscr import FPSCRState +from openpower.xer import XERState +from openpower.util import LogType, log + +LDST_UPDATE_INSNS = ['ldu', 'lwzu', 'lbzu', 'lhzu', 'lhau', 'lfsu', 'lfdu', + 'stwu', 'stbu', 'sthu', 'stfsu', 'stfdu', 'stdu', + ] + instruction_info = namedtuple('instruction_info', 'func read_regs uninit_regs write_regs ' + @@ -83,6 +97,8 @@ REG_SORT_ORDER = { "CA": 0, "CA32": 0, + "FPSCR": 1, + "overflow": 7, # should definitely be last "CR0": 8, # likewise } @@ -129,6 +145,17 @@ def create_args(reglist, extra=None): return retval +def create_full_args(*, read_regs, special_regs, uninit_regs, write_regs, + extra=None): + return create_args([ + *read_regs, *uninit_regs, *write_regs, *special_regs], extra=extra) + + +def is_ffirst_mode(dec2): + rm_mode = yield dec2.rm_dec.mode + return rm_mode == SVP64RMMode.FFIRST.value + + class GPR(dict): def __init__(self, decoder, isacaller, svstate, regfile): dict.__init__(self) @@ -195,9 +222,13 @@ class GPR(dict): rnum = rnum.value dict.__setitem__(self, rnum, value) - def getz(self, rnum): + def getz(self, rnum, rvalue=None): # rnum = rnum.value # only SelectableInt allowed - log("GPR getzero?", rnum) + log("GPR getzero?", rnum, rvalue) + if rvalue is not None: + if rnum == 0: + return SelectableInt(0, rvalue.bits) + return rvalue if rnum == 0: return SelectableInt(0, 64) return self[rnum] @@ -224,13 +255,14 @@ class GPR(dict): for j in range(8): s.append("%08x" % res[i+j]) s = ' '.join(s) - print("reg", "%2d" % i, s) + log("reg", "%2d" % i, s, kind=LogType.InstrInOuts) return res class SPR(dict): - def __init__(self, dec2, initial_sprs={}): + def __init__(self, dec2, initial_sprs={}, gpr=None): self.sd = dec2 + self.gpr = gpr # for SVSHAPE[0-3] dict.__init__(self) for key, v in initial_sprs.items(): if isinstance(key, SelectableInt): @@ -245,8 +277,8 @@ class SPR(dict): self[key] = v def __getitem__(self, key): - log("get spr", key) - log("dict", self.items()) + #log("get spr", key) + #log("dict", self.items()) # if key in special_sprs get the special spr, otherwise return key if isinstance(key, SelectableInt): key = key.value @@ -264,9 +296,9 @@ class SPR(dict): info = spr_dict[key] else: info = spr_byname[key] - dict.__setitem__(self, key, SelectableInt(0, info.length)) + self[key] = SelectableInt(0, info.length) res = dict.__getitem__(self, key) - log("spr returning", key, res) + #log("spr returning", key, res) return res def __setitem__(self, key, value): @@ -280,6 +312,10 @@ class SPR(dict): self.__setitem__('SRR0', value) if key == 'HSRR1': # HACK! self.__setitem__('SRR1', value) + if key == 1: + value = XERState(value) + if key in ('SVSHAPE0', 'SVSHAPE1', 'SVSHAPE2', 'SVSHAPE3'): + value = SVSHAPE(value, self.gpr) log("setting spr", key, value) dict.__setitem__(self, key, value) @@ -431,6 +467,8 @@ def get_idx_map(dec2, name): elif name == 'FRA': if in1_sel == In1Sel.FRA.value: return 1 + if in3_sel == In3Sel.FRA.value: + return 3 elif name == 'FRB': if in2_sel == In2Sel.FRB.value: return 2 @@ -442,6 +480,12 @@ def get_idx_map(dec2, name): return 1 if in3_sel == In3Sel.FRS.value: return 3 + elif name == 'FRT': + if in1_sel == In1Sel.FRT.value: + return 1 + elif name == 'RT': + if in1_sel == In1Sel.RT.value: + return 1 return None @@ -543,6 +587,15 @@ def get_cr_out(dec2, name): if name == 'CR1': # these are not actually calculated correctly if out_sel == CROutSel.CR1.value: return out, o_isvec + # check RC1 set? if so return implicit vector, this is a REAL bad hack + RC1 = yield dec2.rm_dec.RC1 + if RC1: + log("get_cr_out RC1 mode") + if name == 'CR0': + return 0, True # XXX TODO: offset CR0 from SVSTATE SPR + if name == 'CR1': + return 1, True # XXX TODO: offset CR1 from SVSTATE SPR + # nope - not found. log("get_cr_out not found", name) return None, False @@ -568,6 +621,9 @@ def get_out_map(dec2, name): elif name == 'FRA': if out_sel == OutSel.FRA.value: return True + elif name == 'FRS': + if out_sel == OutSel.FRS.value: + return True elif name == 'FRT': if out_sel == OutSel.FRT.value: return True @@ -1098,6 +1154,36 @@ class StepLoop: log(" new dststep", dststep) +class ExitSyscallCalled(Exception): + pass + + +class SyscallEmulator(openpower.syscalls.Dispatcher): + def __init__(self, isacaller): + self.__isacaller = isacaller + + host = os.uname().machine + bits = (64 if (sys.maxsize > (2**32)) else 32) + host = openpower.syscalls.architecture(arch=host, bits=bits) + + return super().__init__(guest="ppc64", host=host) + + def __call__(self, identifier, *arguments): + (identifier, *arguments) = map(int, (identifier, *arguments)) + return super().__call__(identifier, *arguments) + + def sys_exit_group(self, status, *rest): + self.__isacaller.halted = True + raise ExitSyscallCalled(status) + + def sys_write(self, fd, buf, count, *rest): + buf = self.__isacaller.mem.get_ctypes(buf, count, is_write=False) + try: + return os.write(fd, buf) + except OSError as e: + return -e.errno + + class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # decoder2 - an instance of power_decoder2 # regfile - a list of initial values for the registers @@ -1113,7 +1199,35 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): initial_pc=0, bigendian=False, mmu=False, - icachemmu=False): + icachemmu=False, + initial_fpscr=0, + insnlog=None, + use_mmap_mem=False, + use_syscall_emu=False, + emulating_mmap=False): + if use_syscall_emu: + self.syscall = SyscallEmulator(isacaller=self) + if not use_mmap_mem: + log("forcing use_mmap_mem due to use_syscall_emu active") + use_mmap_mem = True + else: + self.syscall = None + + # we will eventually be able to load ELF files without use_syscall_emu + # (e.g. the linux kernel), so do it in a separate if block + if isinstance(initial_insns, ELFFile): + if not use_mmap_mem: + log("forcing use_mmap_mem due to loading an ELF file") + use_mmap_mem = True + if not emulating_mmap: + log("forcing emulating_mmap due to loading an ELF file") + emulating_mmap = True + + # trace log file for model output. if None do nothing + self.insnlog = insnlog + self.insnlog_is_file = hasattr(insnlog, "write") + if not self.insnlog_is_file and self.insnlog: + self.insnlog = open(self.insnlog, "w") self.bigendian = bigendian self.halted = False @@ -1128,6 +1242,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if initial_insns is None: initial_insns = {} assert self.respect_pc == False, "instructions required to honor pc" + if initial_msr is None: + initial_msr = DEFAULT_MSR log("ISACaller insns", respect_pc, initial_insns, disassembly) log("ISACaller initial_msr", initial_msr) @@ -1162,20 +1278,36 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): initial_sprs = deepcopy(initial_sprs) # so as not to get modified self.gpr = GPR(decoder2, self, self.svstate, regfile) self.fpr = GPR(decoder2, self, self.svstate, fpregfile) - self.spr = SPR(decoder2, initial_sprs) # initialise SPRs before MMU + # initialise SPRs before MMU + self.spr = SPR(decoder2, initial_sprs, gpr=self.gpr) # set up 4 dummy SVSHAPEs if they aren't already set up for i in range(4): sname = 'SVSHAPE%d' % i val = self.spr.get(sname, 0) - # make sure it's an SVSHAPE - self.spr[sname] = SVSHAPE(val, self.gpr) + # make sure it's an SVSHAPE -- conversion done by SPR.__setitem__ + self.spr[sname] = val self.last_op_svshape = False # "raw" memory - self.mem = Mem(row_bytes=8, initial_mem=initial_mem, misaligned_ok=True) - self.mem.log_fancy(kind=LogKind.InstrInOuts) - self.imem = Mem(row_bytes=4, initial_mem=initial_insns) + if use_mmap_mem: + self.mem = MemMMap(row_bytes=8, + initial_mem=initial_mem, + misaligned_ok=True, + emulating_mmap=emulating_mmap) + self.imem = self.mem + lelf = self.mem.initialize(row_bytes=4, initial_mem=initial_insns) + if isinstance(lelf, LoadedELF): # stuff parsed from ELF + initial_pc = lelf.pc + for k, v in lelf.gprs.items(): + self.gpr[k] = SelectableInt(v, 64) + initial_fpscr = lelf.fpscr + self.mem.log_fancy(kind=LogType.InstrInOuts) + else: + self.mem = Mem(row_bytes=8, initial_mem=initial_mem, + misaligned_ok=True) + self.mem.log_fancy(kind=LogType.InstrInOuts) + self.imem = Mem(row_bytes=4, initial_mem=initial_insns) # MMU mode, redirect underlying Mem through RADIX if mmu: self.mem = RADIX(self.mem, self) @@ -1186,6 +1318,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # FPR (same as GPR except for FP nums) # 4.2.2 p124 FPSCR (definitely "separate" - not in SPR) # note that mffs, mcrfs, mtfsf "manage" this FPSCR + self.fpscr = FPSCRState(initial_fpscr) + # 2.3.1 CR (and sub-fields CR0..CR6 - CR0 SO comes from XER.SO) # note that mfocrf, mfcr, mtcr, mtocrf, mcrxrx "manage" CRs # -- Done @@ -1201,7 +1335,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.cr_backup = 0 # sigh, dreadful hack: for fail-first (VLi) # "undefined", just set to variable-bit-width int (use exts "max") - # self.undefined = SelectableInt(0, 256) # TODO, not hard-code 256! + # self.undefined = SelectableInt(0, EFFECTIVELY_UNLIMITED) self.namespace = {} self.namespace.update(self.spr) @@ -1219,12 +1353,16 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): 'SVSHAPE3': self.spr['SVSHAPE3'], 'CR': self.cr, 'MSR': self.msr, + 'FPSCR': self.fpscr, 'undefined': undefined, 'mode_is_64bit': True, 'SO': XER_bits['SO'], 'XLEN': 64 # elwidth overrides }) + for name in BFP_FLAG_NAMES: + setattr(self, name, 0) + # update pc to requested start point self.set_pc(initial_pc) @@ -1236,12 +1374,21 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.decoder = decoder2.dec self.dec2 = decoder2 - super().__init__(XLEN=self.namespace["XLEN"]) + super().__init__(XLEN=self.namespace["XLEN"], FPSCR=self.fpscr) + + def trace(self, out): + if self.insnlog is None: + return + self.insnlog.write(out) @property def XLEN(self): return self.namespace["XLEN"] + @property + def FPSCR(self): + return self.fpscr + def call_trap(self, trap_addr, trap_bit): """calls TRAP and sets up NIA to the new execution location. next instruction will begin at trap_addr. @@ -1256,7 +1403,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): TRAP function is callable from inside the pseudocode itself, hence the default arguments. when calling from inside ISACaller it is best to use call_trap() + + trap_addr: int | SelectableInt + the address to go to (before any modifications from `KAIVB`) + trap_bit: int | None + the bit in `SRR1` to set, `None` means don't set any bits. """ + if isinstance(trap_addr, SelectableInt): + trap_addr = trap_addr.value # https://bugs.libre-soc.org/show_bug.cgi?id=859 kaivb = self.spr['KAIVB'].value msr = self.namespace['MSR'].value @@ -1269,7 +1423,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if self.is_svp64_mode: self.spr['SVSRR0'] = self.namespace['SVSTATE'].value self.trap_nia = SelectableInt(trap_addr | (kaivb & ~0x1fff), 64) - self.spr['SRR1'][trap_bit] = 1 # change *copy* of MSR in SRR1 + if trap_bit is not None: + self.spr['SRR1'][trap_bit] = 1 # change *copy* of MSR in SRR1 # set exception bits. TODO: this should, based on the address # in figure 66 p1065 V3.0B and the table figure 65 p1063 set these @@ -1348,28 +1503,143 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.cr_backup = self.cr.value # sv.bc* need some extra fields - if self.is_svp64_mode and insn_name.startswith("sv.bc"): - # blegh grab bits manually - mode = yield self.dec2.rm_dec.rm_in.mode - # convert to SelectableInt before test - mode = SelectableInt(mode, 5) - bc_vlset = mode[SVP64MODEb.BC_VLSET] != 0 - bc_vli = mode[SVP64MODEb.BC_VLI] != 0 - bc_snz = mode[SVP64MODEb.BC_SNZ] != 0 - bc_vsb = yield self.dec2.rm_dec.bc_vsb - bc_lru = yield self.dec2.rm_dec.bc_lru - bc_gate = yield self.dec2.rm_dec.bc_gate - sz = yield self.dec2.rm_dec.pred_sz - self.namespace['mode'] = SelectableInt(mode, 5) - self.namespace['ALL'] = SelectableInt(bc_gate, 1) - self.namespace['VSb'] = SelectableInt(bc_vsb, 1) - self.namespace['LRu'] = SelectableInt(bc_lru, 1) - self.namespace['VLSET'] = SelectableInt(bc_vlset, 1) - self.namespace['VLI'] = SelectableInt(bc_vli, 1) - self.namespace['sz'] = SelectableInt(sz, 1) - self.namespace['SNZ'] = SelectableInt(bc_snz, 1) - - def handle_carry_(self, inputs, output, ca, ca32): + if not self.is_svp64_mode or not insn_name.startswith("sv.bc"): + return + + # blegh grab bits manually + mode = yield self.dec2.rm_dec.rm_in.mode + # convert to SelectableInt before test + mode = SelectableInt(mode, 5) + bc_vlset = mode[SVP64MODEb.BC_VLSET] != 0 + bc_vli = mode[SVP64MODEb.BC_VLI] != 0 + bc_snz = mode[SVP64MODEb.BC_SNZ] != 0 + bc_vsb = yield self.dec2.rm_dec.bc_vsb + bc_ctrtest = yield self.dec2.rm_dec.bc_ctrtest + bc_lru = yield self.dec2.rm_dec.bc_lru + bc_gate = yield self.dec2.rm_dec.bc_gate + sz = yield self.dec2.rm_dec.pred_sz + self.namespace['mode'] = SelectableInt(mode, 5) + self.namespace['ALL'] = SelectableInt(bc_gate, 1) + self.namespace['VSb'] = SelectableInt(bc_vsb, 1) + self.namespace['LRu'] = SelectableInt(bc_lru, 1) + self.namespace['CTRtest'] = SelectableInt(bc_ctrtest, 1) + self.namespace['VLSET'] = SelectableInt(bc_vlset, 1) + self.namespace['VLI'] = SelectableInt(bc_vli, 1) + self.namespace['sz'] = SelectableInt(sz, 1) + self.namespace['SNZ'] = SelectableInt(bc_snz, 1) + + def get_kludged_op_add_ca_ov(self, inputs, inp_ca_ov): + """ this was not at all necessary to do. this function massively + duplicates - in a laborious and complex fashion - the contents of + the CSV files that were extracted two years ago from microwatt's + source code. A-inversion is the "inv A" column, output inversion + is the "inv out" column, carry-in equal to 0 or 1 or CA is the + "cry in" column + + all of that information is available in + self.instrs[ins_name].op_fields + where info is usually assigned to self.instrs[ins_name] + + https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=openpower/isatables/minor_31.csv;hb=HEAD + + the immediate constants are *also* decoded correctly and placed + usually by DecodeIn2Imm into operand2, as part of power_decoder2.py + """ + def ca(a, b, ca_in, width): + mask = (1 << width) - 1 + y = (a & mask) + (b & mask) + ca_in + return y >> width + + asmcode = yield self.dec2.dec.op.asmcode + insn = insns.get(asmcode) + SI = yield self.dec2.dec.SI + SI &= 0xFFFF + CA, OV = inp_ca_ov + inputs = [i.value for i in inputs] + if SI & 0x8000: + SI -= 0x10000 + if insn in ("add", "addo", "addc", "addco"): + a = inputs[0] + b = inputs[1] + ca_in = 0 + elif insn == "addic" or insn == "addic.": + a = inputs[0] + b = SI + ca_in = 0 + elif insn in ("subf", "subfo", "subfc", "subfco"): + a = ~inputs[0] + b = inputs[1] + ca_in = 1 + elif insn == "subfic": + a = ~inputs[0] + b = SI + ca_in = 1 + elif insn == "adde" or insn == "addeo": + a = inputs[0] + b = inputs[1] + ca_in = CA + elif insn == "subfe" or insn == "subfeo": + a = ~inputs[0] + b = inputs[1] + ca_in = CA + elif insn == "addme" or insn == "addmeo": + a = inputs[0] + b = ~0 + ca_in = CA + elif insn == "addze" or insn == "addzeo": + a = inputs[0] + b = 0 + ca_in = CA + elif insn == "subfme" or insn == "subfmeo": + a = ~inputs[0] + b = ~0 + ca_in = CA + elif insn == "subfze" or insn == "subfzeo": + a = ~inputs[0] + b = 0 + ca_in = CA + elif insn == "addex": + # CA[32] aren't actually written, just generate so we have + # something to return + ca64 = ov64 = ca(inputs[0], inputs[1], OV, 64) + ca32 = ov32 = ca(inputs[0], inputs[1], OV, 32) + return ca64, ca32, ov64, ov32 + elif insn == "neg" or insn == "nego": + a = ~inputs[0] + b = 0 + ca_in = 1 + else: + raise NotImplementedError( + "op_add kludge unimplemented instruction: ", asmcode, insn) + + ca64 = ca(a, b, ca_in, 64) + ca32 = ca(a, b, ca_in, 32) + ov64 = ca64 != ca(a, b, ca_in, 63) + ov32 = ca32 != ca(a, b, ca_in, 31) + return ca64, ca32, ov64, ov32 + + def handle_carry_(self, inputs, output, ca, ca32, inp_ca_ov): + if ca is not None and ca32 is not None: + return + op = yield self.dec2.e.do.insn_type + if op == MicrOp.OP_ADD.value and ca is None and ca32 is None: + retval = yield from self.get_kludged_op_add_ca_ov( + inputs, inp_ca_ov) + ca, ca32, ov, ov32 = retval + asmcode = yield self.dec2.dec.op.asmcode + if insns.get(asmcode) == 'addex': + # TODO: if 32-bit mode, set ov to ov32 + self.spr['XER'][XER_bits['OV']] = ov + self.spr['XER'][XER_bits['OV32']] = ov32 + log(f"write OV/OV32 OV={ov} OV32={ov32}", + kind=LogType.InstrInOuts) + else: + # TODO: if 32-bit mode, set ca to ca32 + self.spr['XER'][XER_bits['CA']] = ca + self.spr['XER'][XER_bits['CA32']] = ca32 + log(f"write CA/CA32 CA={ca} CA32={ca32}", + kind=LogType.InstrInOuts) + return inv_a = yield self.dec2.e.do.invert_in if inv_a: inputs[0] = ~inputs[0] @@ -1414,7 +1684,17 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if ca32 is None: # already written self.spr['XER'][XER_bits['CA32']] = cy32 - def handle_overflow(self, inputs, output, div_overflow): + def handle_overflow(self, inputs, output, div_overflow, inp_ca_ov): + op = yield self.dec2.e.do.insn_type + if op == MicrOp.OP_ADD.value: + retval = yield from self.get_kludged_op_add_ca_ov( + inputs, inp_ca_ov) + ca, ca32, ov, ov32 = retval + # TODO: if 32-bit mode, set ov to ov32 + self.spr['XER'][XER_bits['OV']] = ov + self.spr['XER'][XER_bits['OV32']] = ov32 + self.spr['XER'][XER_bits['SO']] |= ov + return if hasattr(self.dec2.e.do, "invert_in"): inv_a = yield self.dec2.e.do.invert_in if inv_a: @@ -1475,10 +1755,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): else: SO = self.spr['XER'][XER_bits['SO']] log("handle_comparison SO", SO.value, - "overflow", overflow, - "zero", zero.value, - "+ve", positive.value, - "-ve", negative.value) + "overflow", overflow, + "zero", zero.value, + "+ve", positive.value, + "-ve", negative.value) # alternative overflow checking (setvl mainly at the moment) if overflow is not None and overflow == 1: SO = SelectableInt(1, 1) @@ -1486,6 +1766,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): cr_field = selectconcat(negative, positive, zero, SO) log("handle_comparison cr_field", self.cr, cr_idx, cr_field) self.crl[cr_idx].eq(cr_field) + return cr_field def set_pc(self, pc_val): self.namespace['NIA'] = SelectableInt(pc_val, 64) @@ -1509,6 +1790,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): pc, insn = self.get_next_insn() yield from self.setup_next_insn(pc, insn) + # cache since it's really slow to construct + __PREFIX_CACHE = SVP64Instruction.Prefix(SelectableInt(value=0, bits=32)) + + def __decode_prefix(self, opcode): + pfx = self.__PREFIX_CACHE + pfx.storage.eq(opcode) + return pfx + def setup_next_insn(self, pc, ins): """set up next instruction """ @@ -1528,7 +1817,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): yield Settle() opcode = yield self.dec2.dec.opcode_in opcode = SelectableInt(value=opcode, bits=32) - pfx = SVP64Instruction.Prefix(opcode) + pfx = self.__decode_prefix(opcode) log("prefix test: opcode:", pfx.PO, bin(pfx.PO), pfx.id) self.is_svp64_mode = bool((pfx.PO == 0b000001) and (pfx.id == 0b11)) self.pc.update_nia(self.is_svp64_mode) @@ -1592,6 +1881,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # not supported yet: raise e # ... re-raise + # append to the trace log file + self.trace(" # %s\n" % code) + log("gprs after code", code) self.gpr.dump() crs = [] @@ -1717,7 +2009,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): dec_insn = yield self.dec2.e.do.insn return dec_insn & (1 << 20) != 0 # sigh - XFF.spr[-1]? - def call(self, name): + def call(self, name, syscall_emu_active=False): """call(opcode) - the primary execution point for instructions """ self.last_st_addr = None # reset the last known store address @@ -1731,7 +2023,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # TODO, asmregs is from the spec, e.g. add RT,RA,RB # see http://bugs.libre-riscv.org/show_bug.cgi?id=282 asmop = yield from self.get_assembly_name() - log("call", ins_name, asmop) + log("call", ins_name, asmop, + kind=LogType.InstrInOuts) # sv.setvl is *not* a loop-function. sigh log("is_svp64_mode", self.is_svp64_mode, asmop) @@ -1764,6 +2057,33 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): self.halted = True return + # User mode system call emulation consists of several steps: + # 1. Detect whether instruction is sc or scv. + # 2. Call the HDL implementation which invokes trap. + # 3. Reroute the guest system call to host system call. + # 4. Force return from the interrupt as if we had guest OS. + if ((asmop in ("sc", "scv")) and + (self.syscall is not None) and + not syscall_emu_active): + # Memoize PC and trigger an interrupt + if self.respect_pc: + pc = self.pc.CIA.value + else: + pc = self.fake_pc + yield from self.call(asmop, syscall_emu_active=True) + + # Reroute the syscall to host OS + identifier = self.gpr(0) + arguments = map(self.gpr, range(3, 9)) + result = self.syscall(identifier, *arguments) + self.gpr.write(3, result, False, self.namespace["XLEN"]) + + # Return from interrupt + yield from self.call("rfid", syscall_emu_active=True) + return + elif ((name in ("rfid", "hrfid")) and syscall_emu_active): + asmop = "rfid" + # check illegal instruction illegal = False if ins_name not in ['mtcrf', 'mtocrf']: @@ -1772,19 +2092,33 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # list of instructions not being supported by binutils (.long) dotstrp = asmop[:-1] if asmop[-1] == '.' else asmop if dotstrp in [*FPTRANS_INSNS, + *LDST_UPDATE_INSNS, 'ffmadds', 'fdmadds', 'ffadds', - 'mins', 'maxs', 'minu', 'maxu', + 'minmax', + "brh", "brw", "brd", 'setvl', 'svindex', 'svremap', 'svstep', 'svshape', 'svshape2', - 'grev', 'ternlogi', 'bmask', 'cprop', + 'ternlogi', 'bmask', 'cprop', 'gbbd', 'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd', 'fmvis', 'fishmv', 'pcdec', "maddedu", "divmod2du", "dsld", "dsrd", "maddedus", - "shadd", "shadduw", + "sadd", "saddw", "sadduw", + "cffpr", "cffpro", + "mffpr", "mffprs", + "ctfpr", "ctfprs", + "mtfpr", "mtfprs", + "maddsubrs", "maddrs", "msubrs", + "cfuged", "cntlzdm", "cnttzdm", "pdepd", "pextd", + "setbc", "setbcr", "setnbc", "setnbcr", ]: illegal = False ins_name = dotstrp + # match against instructions treated as nop, see nop below + if asmop.startswith("dcbt"): + illegal = False + ins_name = "nop" + # branch-conditional redirects to sv.bc if asmop.startswith('bc') and self.is_svp64_mode: ins_name = 'sv.%s' % ins_name @@ -1830,7 +2164,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): ew_src = 8 << (3-int(ew_src)) # convert to bitlength ew_dst = 8 << (3-int(ew_dst)) # convert to bitlength xlen = max(ew_src, ew_dst) - log("elwdith", ew_src, ew_dst) + log("elwidth", ew_src, ew_dst) log("XLEN:", self.is_svp64_mode, xlen) # look up instruction in ISA.instrs, prepare namespace @@ -1843,9 +2177,16 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): yield from self.prep_namespace(ins_name, info.form, info.op_fields, xlen) + # dict retains order + inputs = dict.fromkeys(create_full_args( + read_regs=info.read_regs, special_regs=info.special_regs, + uninit_regs=info.uninit_regs, write_regs=info.write_regs)) + # preserve order of register names - input_names = create_args(list(info.read_regs) + - list(info.uninit_regs)) + write_without_special_regs = OrderedSet(info.write_regs) + write_without_special_regs -= OrderedSet(info.special_regs) + input_names = create_args([ + *info.read_regs, *info.uninit_regs, *write_without_special_regs]) log("input names", input_names) # get SVP64 entry for the current instruction @@ -1882,7 +2223,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if self.is_svp64_mode and vl == 0: self.pc.update(self.namespace, self.is_svp64_mode) log("SVP64: VL=0, end of call", self.namespace['CIA'], - self.namespace['NIA'], kind=LogKind.InstrInOuts) + self.namespace['NIA'], kind=LogType.InstrInOuts) return # for when SVREMAP is active, using pre-arranged schedule. @@ -1904,14 +2245,37 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): remap_active = yield self.dec2.remap_active else: remap_active = False - log("remap active", bin(remap_active)) + log("remap active", bin(remap_active), self.is_svp64_mode) + + # LDST does *not* allow elwidth overrides on RA (Effective Address). + # this has to be detected. XXX TODO: RB for ldst-idx *may* need + # conversion (to 64-bit) also. + # see write reg this *HAS* to also override XLEN to 64 on LDST/Update + sv_mode = yield self.dec2.rm_dec.sv_mode + is_ldst = (sv_mode in [SVMode.LDST_IDX.value, SVMode.LDST_IMM.value] \ + and self.is_svp64_mode) + log("is_ldst", sv_mode, is_ldst) # main input registers (RT, RA ...) - inputs = [] for name in input_names: - regval = (yield from self.get_input(name, ew_src)) - log("regval name", name, regval) - inputs.append(regval) + if name == "overflow": + inputs[name] = SelectableInt(0, 1) + elif name == "FPSCR": + inputs[name] = self.FPSCR + elif name in ("CA", "CA32", "OV", "OV32"): + inputs[name] = self.spr['XER'][XER_bits[name]] + elif name in "CR0": + inputs[name] = self.crl[0] + elif name in spr_byname: + inputs[name] = self.spr[name] + elif is_ldst and name == 'RA': + regval = (yield from self.get_input(name, ew_src, 64)) + log("EA (RA) regval name", name, regval) + inputs[name] = regval + else: + regval = (yield from self.get_input(name, ew_src, xlen)) + log("regval name", name, regval) + inputs[name] = regval # arrrrgh, awful hack, to get _RT into namespace if ins_name in ['setvl', 'svstep']: @@ -1932,29 +2296,49 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # "special" registers for special in info.special_regs: if special in special_sprs: - inputs.append(self.spr[special]) + inputs[special] = self.spr[special] else: - inputs.append(self.namespace[special]) + inputs[special] = self.namespace[special] # clear trap (trap) NIA self.trap_nia = None # check if this was an sv.bc* and create an indicator that # this is the last check to be made as a loop. combined with - # the ALL/ANY mode we can early-exit + # the ALL/ANY mode we can early-exit. note that BI (to test) + # is an input so there is no termination if BI is scalar + # (because early-termination is for *output* scalars) if self.is_svp64_mode and ins_name.startswith("sv.bc"): - no_in_vec = yield self.dec2.no_in_vec # BI is scalar - end_loop = no_in_vec or srcstep == vl-1 or dststep == vl-1 + end_loop = srcstep == vl-1 or dststep == vl-1 self.namespace['end_loop'] = SelectableInt(end_loop, 1) + inp_ca_ov = (self.spr['XER'][XER_bits['CA']].value, + self.spr['XER'][XER_bits['OV']].value) + + for k, v in inputs.items(): + if v is None: + v = SelectableInt(0, self.XLEN) + # prevent pseudo-code from modifying input registers + v = copy_assign_rhs(v) + if isinstance(v, SelectableInt): + v.ok = False + inputs[k] = v + # execute actual instruction here (finally) log("inputs", inputs) + inputs = list(inputs.values()) results = info.func(self, *inputs) output_names = create_args(info.write_regs) outs = {} + # record .ok before anything after the pseudo-code can modify it + outs_ok = {} for out, n in zip(results or [], output_names): outs[n] = out + outs_ok[n] = True + if isinstance(out, SelectableInt): + outs_ok[n] = out.ok log("results", outs) + log("results ok", outs_ok) # "inject" decorator takes namespace from function locals: we need to # overwrite NIA being overwritten (sigh) @@ -1979,21 +2363,29 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): ca32 = outs.get("CA32") log("carry already done?", ca, ca32, output_names) - carry_en = yield self.dec2.e.do.output_carry + # soc test_pipe_caller tests don't have output_carry + has_output_carry = hasattr(self.dec2.e.do, "output_carry") + carry_en = has_output_carry and (yield self.dec2.e.do.output_carry) if carry_en: - yield from self.handle_carry_(inputs, results[0], ca, ca32) + yield from self.handle_carry_( + inputs, results[0], ca, ca32, inp_ca_ov=inp_ca_ov) - # get outout named "overflow" and "CR0" + # get output named "overflow" and "CR0" overflow = outs.get('overflow') cr0 = outs.get('CR0') + cr1 = outs.get('CR1') - if not self.is_svp64_mode: # yeah just no. not in parallel processing + # soc test_pipe_caller tests don't have oe + has_oe = hasattr(self.dec2.e.do, "oe") + # yeah just no. not in parallel processing + if has_oe and not self.is_svp64_mode: # detect if overflow was in return result ov_en = yield self.dec2.e.do.oe.oe ov_ok = yield self.dec2.e.do.oe.ok log("internal overflow", ins_name, overflow, "en?", ov_en, ov_ok) if ov_en & ov_ok: - yield from self.handle_overflow(inputs, results[0], overflow) + yield from self.handle_overflow( + inputs, results[0], overflow, inp_ca_ov=inp_ca_ov) # only do SVP64 dest predicated Rc=1 if dest-pred is not enabled rc_en = False @@ -2004,7 +2396,12 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # XXX TODO: now that CR0 is supported, sort out svstep's pseudocode # to write directly to CR0 instead of in ISACaller. hooyahh. if rc_en and ins_name not in ['svstep']: - yield from self.do_rc_ov(ins_name, results[0], overflow, cr0) + if outs_ok.get('FPSCR', False): + FPSCR = outs['FPSCR'] + else: + FPSCR = self.FPSCR + yield from self.do_rc_ov( + ins_name, results[0], overflow, cr0, cr1, FPSCR) # check failfirst ffirst_hit = False, False @@ -2012,11 +2409,23 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): sv_mode = yield self.dec2.rm_dec.sv_mode is_cr = sv_mode == SVMode.CROP.value chk = rc_en or is_cr + if outs_ok.get('CR', False): + # early write so check_ffirst can see value + self.namespace['CR'].eq(outs['CR']) ffirst_hit = (yield from self.check_ffirst(info, chk, srcstep)) # any modified return results? - yield from self.do_outregs_nia(asmop, ins_name, info, outs, - carry_en, rc_en, ffirst_hit, ew_dst) + yield from self.do_outregs( + info, outs, carry_en, ffirst_hit, ew_dst, outs_ok) + + # check if a FP Exception occurred. TODO for DD-FFirst, check VLi + # and raise the exception *after* if VLi=1 but if VLi=0 then + # truncate and make the exception "disappear". + if self.FPSCR.FEX and (self.msr[MSRb.FE0] or self.msr[MSRb.FE1]): + self.call_trap(0x700, PIb.FP) + return + + yield from self.do_nia(asmop, ins_name, rc_en, ffirst_hit) def check_ffirst(self, info, rc_en, srcstep): """fail-first mode: checks a bit of Rc Vector, truncates VL @@ -2032,7 +2441,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): log(" vli", vli_) log(" cr_bit", cr_bit) log(" rc_en", rc_en) - if not rc_en or rm_mode != SVP64RMMode.FFIRST.value: + if not rc_en or not is_ffirst_mode(self.dec2): return False, False # get the CR vevtor, do BO-test crf = "CR0" @@ -2053,9 +2462,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): yield Settle() # let decoder update return True, vli_ - def do_rc_ov(self, ins_name, result, overflow, cr0): - if ins_name.startswith("f"): - rc_reg = "CR1" # not calculated correctly yet (not FP compares) + def do_rc_ov(self, ins_name, result, overflow, cr0, cr1, FPSCR): + cr_out = yield self.dec2.op.cr_out + if cr_out == CROutSel.CR1.value: + rc_reg = "CR1" else: rc_reg = "CR0" regnum, is_vec = yield from get_cr_out(self.dec2, rc_reg) @@ -2063,35 +2473,53 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): is_setvl = ins_name in ('svstep', 'setvl') if is_setvl: result = SelectableInt(result.vl, 64) - #else: + # else: # overflow = None # do not override overflow except in setvl - # if there was not an explicit CR0 in the pseudocode, do implicit Rc=1 - if cr0 is None: - self.handle_comparison(result, regnum, overflow, no_so=is_setvl) + if rc_reg == "CR1": + if cr1 is None: + cr1 = int(FPSCR.FX) << 3 + cr1 |= int(FPSCR.FEX) << 2 + cr1 |= int(FPSCR.VX) << 1 + cr1 |= int(FPSCR.OX) + log("default fp cr1", cr1) + else: + log("explicit cr1", cr1) + self.crl[regnum].eq(cr1) + elif cr0 is None: + # if there was not an explicit CR0 in the pseudocode, + # do implicit Rc=1 + c = self.handle_comparison(result, regnum, overflow, no_so=is_setvl) + log("implicit cr0", c) else: # otherwise we just blat CR0 into the required regnum - log("explicit rc0", cr0) + log("explicit cr0", cr0) self.crl[regnum].eq(cr0) - def do_outregs_nia(self, asmop, ins_name, info, outs, - carry_en, rc_en, ffirst_hit, ew_dst): + def do_outregs(self, info, outs, ca_en, ffirst_hit, ew_dst, outs_ok): ffirst_hit, vli = ffirst_hit - # write out any regs for this instruction - for name, output in outs.items(): - yield from self.check_write(info, name, output, carry_en, ew_dst) + # write out any regs for this instruction, but only if fail-first is ok + # XXX TODO: allow CR-vector to be written out even if ffirst fails + if not ffirst_hit or vli: + for name, output in outs.items(): + if not outs_ok[name]: + log("skipping writing output with .ok=False", name, output) + continue + yield from self.check_write(info, name, output, ca_en, ew_dst) # restore the CR value on non-VLI failfirst (from sv.cmp and others # which write directly to CR in the pseudocode (gah, what a mess) # if ffirst_hit and not vli: # self.cr.value = self.cr_backup + def do_nia(self, asmop, ins_name, rc_en, ffirst_hit): + ffirst_hit, vli = ffirst_hit if ffirst_hit: self.svp64_reset_loop() nia_update = True else: # check advancement of src/dst/sub-steps and if PC needs updating - nia_update = (yield from self.check_step_increment(rc_en, - asmop, ins_name)) + nia_update = (yield from self.check_step_increment( + rc_en, asmop, ins_name)) if nia_update: self.update_pc_next() @@ -2114,14 +2542,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if op == MicrOp.OP_LOAD.value: if remap_active: offsmul = yield self.dec2.in1_step - log("D-field REMAP src", imm, offsmul) + log("D-field REMAP src", imm, offsmul, ldstmode) else: offsmul = (srcstep * (subvl+1)) + ssubstep - log("D-field src", imm, offsmul) + log("D-field src", imm, offsmul, ldstmode) elif op == MicrOp.OP_STORE.value: # XXX NOTE! no bit-reversed STORE! this should not ever be used offsmul = (dststep * (subvl+1)) + dsubstep - log("D-field dst", imm, offsmul) + log("D-field dst", imm, offsmul, ldstmode) # Unit-Strided LD/ST adds offset*width to immediate if ldstmode == SVP64LDSTmode.UNITSTRIDE.value: ldst_len = yield self.dec2.e.do.data_len @@ -2146,7 +2574,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): else: self.namespace['D'] = imm - def get_input(self, name, ew_src): + def get_input(self, name, ew_src, xlen): # using PowerDecoder2, first, find the decoder index. # (mapping name RA RB RC RS to in1, in2, in3) regnum, is_vec = yield from get_idx_in(self.dec2, name, True) @@ -2167,17 +2595,28 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): regname = "_" + name if not self.is_svp64_mode or ew_src == 64: self.namespace[regname] = regnum - elif regname in self.namespace: - del self.namespace[regname] + else: + # FIXME: we're trying to access a sub-register, plain register + # numbers don't work for that. for now, just pass something that + # can be compared to 0 and probably will cause an error if misused. + # see https://bugs.libre-soc.org/show_bug.cgi?id=1221 + self.namespace[regname] = regnum * 10000 if not self.is_svp64_mode or not self.pred_src_zero: log('reading reg %s %s' % (name, str(regnum)), is_vec) if name in fregs: - reg_val = SelectableInt(self.fpr(base, is_vec, offs, ew_src)) - log("read reg %d/%d: 0x%x" % (base, offs, reg_val.value)) + fval = self.fpr(base, is_vec, offs, ew_src) + reg_val = SelectableInt(fval) + assert ew_src == self.XLEN, "TODO fix elwidth conversion" + self.trace("r:FPR:%d:%d:%d " % (base, offs, ew_src)) + log("read fp reg %d/%d: 0x%x" % (base, offs, reg_val.value), + kind=LogType.InstrInOuts) elif name is not None: - reg_val = SelectableInt(self.gpr(base, is_vec, offs, ew_src)) - log("read reg %d/%d: 0x%x" % (base, offs, reg_val.value)) + gval = self.gpr(base, is_vec, offs, ew_src) + reg_val = SelectableInt(gval.value, bits=xlen) + self.trace("r:GPR:%d:%d:%d " % (base, offs, ew_src)) + log("read int reg %d/%d: 0x%x" % (base, offs, reg_val.value), + kind=LogType.InstrInOuts) else: log('zero input reg %s %s' % (name, str(regnum)), is_vec) reg_val = SelectableInt(0, ew_src) @@ -2251,8 +2690,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): if name == 'CR0': # ignore, done already (above) return if isinstance(output, int): - output = SelectableInt(output, 256) - # write carry flafs + output = SelectableInt(output, EFFECTIVELY_UNLIMITED) + # write FPSCR + if name in ['FPSCR', ]: + log("write FPSCR 0x%x" % (output.value)) + self.FPSCR.eq(output) + return + # write carry flags if name in ['CA', 'CA32']: if carry_en: log("writing %s to XER" % name, output) @@ -2264,7 +2708,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # write special SPRs if name in info.special_regs: log('writing special %s' % name, output, special_sprs) - log("write reg %s 0x%x" % (name, output.value)) + log("write reg %s 0x%x" % (name, output.value), + kind=LogType.InstrInOuts) if name in special_sprs: self.spr[name] = output else: @@ -2288,17 +2733,35 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # check zeroing due to predicate bit being zero if self.is_svp64_mode and self.pred_dst_zero: log('zeroing reg %s %s' % (str(regnum), str(output)), is_vec) - output = SelectableInt(0, 256) + output = SelectableInt(0, EFFECTIVELY_UNLIMITED) log("write reg %s%s 0x%x ew %d" % (reg_prefix, str(regnum), output.value, ew_dst), - kind=LogKind.InstrInOuts) + kind=LogType.InstrInOuts) # zero-extend tov64 bit begore storing (should use EXT oh well) if output.bits > 64: output = SelectableInt(output.value, 64) + rnum, base, offset = regnum if name in fregs: self.fpr.write(regnum, output, is_vec, ew_dst) - else: - self.gpr.write(regnum, output, is_vec, ew_dst) + self.trace("w:FPR:%d:%d:%d " % (rnum, offset, ew_dst)) + return + + # LDST/Update does *not* allow elwidths on RA (Effective Address). + # this has to be detected, and overridden. see get_input (related) + sv_mode = yield self.dec2.rm_dec.sv_mode + is_ldst = (sv_mode in [SVMode.LDST_IDX.value, SVMode.LDST_IMM.value] \ + and self.is_svp64_mode) + if is_ldst and name in ['EA', 'RA']: + op = self.dec2.dec.op + if hasattr(op, "upd"): + # update mode LD/ST uses read-reg A also as an output + upd = yield op.upd + log("write is_ldst is_update", sv_mode, is_ldst, upd) + if upd == LDSTMode.update.value: + ew_dst = 64 # override for RA (EA) to 64-bit + + self.gpr.write(regnum, output, is_vec, ew_dst) + self.trace("w:GPR:%d:%d:%d " % (rnum, offset, ew_dst)) def check_step_increment(self, rc_en, asmop, ins_name): # check if it is the SVSTATE.src/dest step that needs incrementing @@ -2457,6 +2920,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): vfirst = self.svstate.vfirst log(" SV Vertical First", vf, vfirst) if not vf and vfirst == 1: + # SV Branch-Conditional required to be as-if-vector + # because there *is* no destination register + # (SV normally only terminates on 1st scalar reg written + # except in [slightly-misnamed] mapreduce mode) + if insn_name.startswith("sv.bc") or ffirst: + self.update_pc_next() + return False self.update_nia() return True @@ -2477,6 +2947,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): sv_ptype = yield self.dec2.dec.op.SV_Ptype out_vec = not (yield self.dec2.no_out_vec) in_vec = not (yield self.dec2.no_in_vec) + rm_mode = yield self.dec2.rm_dec.mode log(" svstate.vl", vl) log(" svstate.mvl", mvl) log(" rm.subvl", subvl) @@ -2491,6 +2962,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): log(" out_vec", out_vec) log(" in_vec", in_vec) log(" sv_ptype", sv_ptype, sv_ptype == SVPType.P2.value) + log(" rm_mode", rm_mode) # check if this was an sv.bc* and if so did it succeed if self.is_svp64_mode and insn_name.startswith("sv.bc"): end_loop = self.namespace['end_loop'] @@ -2506,6 +2978,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): svp64_is_vector = (out_vec or in_vec) else: svp64_is_vector = out_vec + # also if data-dependent fail-first is used, only in_vec is tested, + # allowing *scalar destinations* to be used as an accumulator. + # effectively this implies /mr (mapreduce mode) is 100% on with ddffirst + # see https://bugs.libre-soc.org/show_bug.cgi?id=1183#c16 + if is_ffirst_mode(self.dec2): + svp64_is_vector = in_vec + # loops end at the first "hit" (source or dest) yield from self.advance_svstate_steps() loopend = self.loopend @@ -2523,7 +3002,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): # not an SVP64 branch, so fix PC (NIA==CIA) for next loop # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64) # this way we keep repeating the same instruction (with new steps) - self.pc.NIA.value = self.pc.CIA.value + self.pc.NIA.eq(self.pc.CIA) self.namespace['NIA'] = self.pc.NIA log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA']) return False # DO NOT allow PC update whilst Sub-PC loop running