fix elwidth overrides when sw=8

[openpower-isa.git] / src / openpower / decoder / isa / caller.py
diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py

index 5faa41fe2c20b97909c78bc1f031593ee331ed93..9a8c24817c27f77ea51003ab2ecd1af39e271a93 100644 (file)
--- a/src/openpower/decoder/isa/caller.py
+++ b/src/openpower/decoder/isa/caller.py
@@ -16,13 +16,17 @@ related bugs:
  from collections import namedtuple
  from copy import deepcopy
  from functools import wraps
  from collections import namedtuple
  from copy import deepcopy
  from functools import wraps
+import os
+import sys
+from elftools.elf.elffile import ELFFile  # for isinstance
  
  from nmigen.sim import Settle
  
  from nmigen.sim import Settle
+import openpower.syscalls
  from openpower.consts import (MSRb, PIb,  # big-endian (PowerISA versions)
                                SVP64CROffs, SVP64MODEb)
  from openpower.decoder.helpers import (ISACallerHelper, ISAFPHelpers, exts,
  from openpower.consts import (MSRb, PIb,  # big-endian (PowerISA versions)
                                SVP64CROffs, SVP64MODEb)
  from openpower.decoder.helpers import (ISACallerHelper, ISAFPHelpers, exts,
-                                       gtu, undefined)
-from openpower.decoder.isa.mem import Mem, MemException
+                                       gtu, undefined, copy_assign_rhs)
+from openpower.decoder.isa.mem import Mem, MemMMap, MemException, LoadedELF
  from openpower.decoder.isa.radixmmu import RADIX
  from openpower.decoder.isa.svshape import SVSHAPE
  from openpower.decoder.isa.svstate import SVP64State
  from openpower.decoder.isa.radixmmu import RADIX
  from openpower.decoder.isa.svshape import SVSHAPE
  from openpower.decoder.isa.svstate import SVP64State
@@ -32,13 +36,23 @@ from openpower.decoder.power_enums import (FPTRANS_INSNS, CRInSel, CROutSel,
                                             MicrOp, OutSel, SVMode,
                                             SVP64LDSTmode, SVP64PredCR,
                                             SVP64PredInt, SVP64PredMode,
                                             MicrOp, OutSel, SVMode,
                                             SVP64LDSTmode, SVP64PredCR,
                                             SVP64PredInt, SVP64PredMode,
-                                           SVP64RMMode, SVPtype, XER_bits,
-                                           insns, spr_byname, spr_dict)
-from openpower.decoder.power_insn import SVP64Instruction
+                                           SVP64RMMode, SVPType, XER_bits,
+                                           insns, spr_byname, spr_dict,
+                                           BFP_FLAG_NAMES)
+from openpower.insndb.core import SVP64Instruction
  from openpower.decoder.power_svp64 import SVP64RM, decode_extra
  from openpower.decoder.selectable_int import (FieldSelectableInt,
  from openpower.decoder.power_svp64 import SVP64RM, decode_extra
  from openpower.decoder.selectable_int import (FieldSelectableInt,
-                                              SelectableInt, selectconcat)
-from openpower.util import LogKind, log
+                                              SelectableInt, selectconcat,
+                                              EFFECTIVELY_UNLIMITED)
+from openpower.consts import DEFAULT_MSR
+from openpower.fpscr import FPSCRState
+from openpower.xer import XERState
+from openpower.util import LogType, log
+
+LDST_UPDATE_INSNS = ['ldu', 'lwzu', 'lbzu', 'lhzu', 'lhau', 'lfsu', 'lfdu',
+                     'stwu', 'stbu', 'sthu', 'stfsu', 'stfdu', 'stdu',
+                     ]
+
  
  instruction_info = namedtuple('instruction_info',
                                'func read_regs uninit_regs write_regs ' +
  
  instruction_info = namedtuple('instruction_info',
                                'func read_regs uninit_regs write_regs ' +
@@ -83,6 +97,8 @@ REG_SORT_ORDER = {
      "CA": 0,
      "CA32": 0,
  
      "CA": 0,
      "CA32": 0,
  
+    "FPSCR": 1,
+
      "overflow": 7,  # should definitely be last
      "CR0": 8,       # likewise
  }
      "overflow": 7,  # should definitely be last
      "CR0": 8,       # likewise
  }
@@ -129,6 +145,12 @@ def create_args(reglist, extra=None):
      return retval
  
  
      return retval
  
  
+def create_full_args(*, read_regs, special_regs, uninit_regs, write_regs,
+                     extra=None):
+    return create_args([
+        *read_regs, *uninit_regs, *write_regs, *special_regs], extra=extra)
+
+
  class GPR(dict):
      def __init__(self, decoder, isacaller, svstate, regfile):
          dict.__init__(self)
  class GPR(dict):
      def __init__(self, decoder, isacaller, svstate, regfile):
          dict.__init__(self)
@@ -224,13 +246,14 @@ class GPR(dict):
                  for j in range(8):
                      s.append("%08x" % res[i+j])
                  s = ' '.join(s)
                  for j in range(8):
                      s.append("%08x" % res[i+j])
                  s = ' '.join(s)
-                print("reg", "%2d" % i, s)
+                log("reg", "%2d" % i, s, kind=LogType.InstrInOuts)
          return res
  
  
  class SPR(dict):
          return res
  
  
  class SPR(dict):
-    def __init__(self, dec2, initial_sprs={}):
+    def __init__(self, dec2, initial_sprs={}, gpr=None):
          self.sd = dec2
          self.sd = dec2
+        self.gpr = gpr  # for SVSHAPE[0-3]
          dict.__init__(self)
          for key, v in initial_sprs.items():
              if isinstance(key, SelectableInt):
          dict.__init__(self)
          for key, v in initial_sprs.items():
              if isinstance(key, SelectableInt):
@@ -245,8 +268,8 @@ class SPR(dict):
              self[key] = v
  
      def __getitem__(self, key):
              self[key] = v
  
      def __getitem__(self, key):
-        log("get spr", key)
-        log("dict", self.items())
+        #log("get spr", key)
+        #log("dict", self.items())
          # if key in special_sprs get the special spr, otherwise return key
          if isinstance(key, SelectableInt):
              key = key.value
          # if key in special_sprs get the special spr, otherwise return key
          if isinstance(key, SelectableInt):
              key = key.value
@@ -264,9 +287,9 @@ class SPR(dict):
                  info = spr_dict[key]
              else:
                  info = spr_byname[key]
                  info = spr_dict[key]
              else:
                  info = spr_byname[key]
-            dict.__setitem__(self, key, SelectableInt(0, info.length))
+            self[key] = SelectableInt(0, info.length)
              res = dict.__getitem__(self, key)
              res = dict.__getitem__(self, key)
-        log("spr returning", key, res)
+        #log("spr returning", key, res)
          return res
  
      def __setitem__(self, key, value):
          return res
  
      def __setitem__(self, key, value):
@@ -280,6 +303,10 @@ class SPR(dict):
              self.__setitem__('SRR0', value)
          if key == 'HSRR1':  # HACK!
              self.__setitem__('SRR1', value)
              self.__setitem__('SRR0', value)
          if key == 'HSRR1':  # HACK!
              self.__setitem__('SRR1', value)
+        if key == 1:
+            value = XERState(value)
+        if key in ('SVSHAPE0', 'SVSHAPE1', 'SVSHAPE2', 'SVSHAPE3'):
+            value = SVSHAPE(value, self.gpr)
          log("setting spr", key, value)
          dict.__setitem__(self, key, value)
  
          log("setting spr", key, value)
          dict.__setitem__(self, key, value)
  
@@ -431,6 +458,8 @@ def get_idx_map(dec2, name):
      elif name == 'FRA':
          if in1_sel == In1Sel.FRA.value:
              return 1
      elif name == 'FRA':
          if in1_sel == In1Sel.FRA.value:
              return 1
+        if in3_sel == In3Sel.FRA.value:
+            return 3
      elif name == 'FRB':
          if in2_sel == In2Sel.FRB.value:
              return 2
      elif name == 'FRB':
          if in2_sel == In2Sel.FRB.value:
              return 2
@@ -442,6 +471,12 @@ def get_idx_map(dec2, name):
              return 1
          if in3_sel == In3Sel.FRS.value:
              return 3
              return 1
          if in3_sel == In3Sel.FRS.value:
              return 3
+    elif name == 'FRT':
+        if in1_sel == In1Sel.FRT.value:
+            return 1
+    elif name == 'RT':
+        if in1_sel == In1Sel.RT.value:
+            return 1
      return None
  
  
      return None
  
  
@@ -543,6 +578,15 @@ def get_cr_out(dec2, name):
      if name == 'CR1':  # these are not actually calculated correctly
          if out_sel == CROutSel.CR1.value:
              return out, o_isvec
      if name == 'CR1':  # these are not actually calculated correctly
          if out_sel == CROutSel.CR1.value:
              return out, o_isvec
+    # check RC1 set? if so return implicit vector, this is a REAL bad hack
+    RC1 = yield dec2.rm_dec.RC1
+    if RC1:
+        log("get_cr_out RC1 mode")
+        if name == 'CR0':
+            return 0, True  # XXX TODO: offset CR0 from SVSTATE SPR
+        if name == 'CR1':
+            return 1, True  # XXX TODO: offset CR1 from SVSTATE SPR
+    # nope - not found.
      log("get_cr_out not found", name)
      return None, False
  
      log("get_cr_out not found", name)
      return None, False
  
@@ -568,6 +612,9 @@ def get_out_map(dec2, name):
      elif name == 'FRA':
          if out_sel == OutSel.FRA.value:
              return True
      elif name == 'FRA':
          if out_sel == OutSel.FRA.value:
              return True
+    elif name == 'FRS':
+        if out_sel == OutSel.FRS.value:
+            return True
      elif name == 'FRT':
          if out_sel == OutSel.FRT.value:
              return True
      elif name == 'FRT':
          if out_sel == OutSel.FRT.value:
              return True
@@ -973,11 +1020,11 @@ class StepLoop:
          pred_sz = yield self.dec2.rm_dec.pred_sz
          if pmode == SVP64PredMode.INT.value:
              srcmask = dstmask = get_predint(self.gpr, dstpred)
          pred_sz = yield self.dec2.rm_dec.pred_sz
          if pmode == SVP64PredMode.INT.value:
              srcmask = dstmask = get_predint(self.gpr, dstpred)
-            if sv_ptype == SVPtype.P2.value:
+            if sv_ptype == SVPType.P2.value:
                  srcmask = get_predint(self.gpr, srcpred)
          elif pmode == SVP64PredMode.CR.value:
              srcmask = dstmask = get_predcr(self.crl, dstpred, vl)
                  srcmask = get_predint(self.gpr, srcpred)
          elif pmode == SVP64PredMode.CR.value:
              srcmask = dstmask = get_predcr(self.crl, dstpred, vl)
-            if sv_ptype == SVPtype.P2.value:
+            if sv_ptype == SVPType.P2.value:
                  srcmask = get_predcr(self.crl, srcpred, vl)
          # work out if the ssubsteps are completed
          ssubstart = ssubstep == 0
                  srcmask = get_predcr(self.crl, srcpred, vl)
          # work out if the ssubsteps are completed
          ssubstart = ssubstep == 0
@@ -1098,6 +1145,36 @@ class StepLoop:
          log("    new dststep", dststep)
  
  
          log("    new dststep", dststep)
  
  
+class ExitSyscallCalled(Exception):
+    pass
+
+
+class SyscallEmulator(openpower.syscalls.Dispatcher):
+    def __init__(self, isacaller):
+        self.__isacaller = isacaller
+
+        host = os.uname().machine
+        bits = (64 if (sys.maxsize > (2**32)) else 32)
+        host = openpower.syscalls.architecture(arch=host, bits=bits)
+
+        return super().__init__(guest="ppc64", host=host)
+
+    def __call__(self, identifier, *arguments):
+        (identifier, *arguments) = map(int, (identifier, *arguments))
+        return super().__call__(identifier, *arguments)
+
+    def sys_exit_group(self, status, *rest):
+        self.__isacaller.halted = True
+        raise ExitSyscallCalled(status)
+
+    def sys_write(self, fd, buf, count, *rest):
+        buf = self.__isacaller.mem.get_ctypes(buf, count, is_write=False)
+        try:
+            return os.write(fd, buf)
+        except OSError as e:
+            return -e.errno
+
+
  class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
      # decoder2 - an instance of power_decoder2
      # regfile - a list of initial values for the registers
  class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
      # decoder2 - an instance of power_decoder2
      # regfile - a list of initial values for the registers
@@ -1113,7 +1190,35 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
                   initial_pc=0,
                   bigendian=False,
                   mmu=False,
                   initial_pc=0,
                   bigendian=False,
                   mmu=False,
-                 icachemmu=False):
+                 icachemmu=False,
+                 initial_fpscr=0,
+                 insnlog=None,
+                 use_mmap_mem=False,
+                 use_syscall_emu=False,
+                 emulating_mmap=False):
+        if use_syscall_emu:
+            self.syscall = SyscallEmulator(isacaller=self)
+            if not use_mmap_mem:
+                log("forcing use_mmap_mem due to use_syscall_emu active")
+                use_mmap_mem = True
+        else:
+            self.syscall = None
+
+        # we will eventually be able to load ELF files without use_syscall_emu
+        # (e.g. the linux kernel), so do it in a separate if block
+        if isinstance(initial_insns, ELFFile):
+            if not use_mmap_mem:
+                log("forcing use_mmap_mem due to loading an ELF file")
+                use_mmap_mem = True
+            if not emulating_mmap:
+                log("forcing emulating_mmap due to loading an ELF file")
+                emulating_mmap = True
+
+        # trace log file for model output. if None do nothing
+        self.insnlog = insnlog
+        self.insnlog_is_file = hasattr(insnlog, "write")
+        if not self.insnlog_is_file and self.insnlog:
+            self.insnlog = open(self.insnlog, "w")
  
          self.bigendian = bigendian
          self.halted = False
  
          self.bigendian = bigendian
          self.halted = False
@@ -1128,6 +1233,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          if initial_insns is None:
              initial_insns = {}
              assert self.respect_pc == False, "instructions required to honor pc"
          if initial_insns is None:
              initial_insns = {}
              assert self.respect_pc == False, "instructions required to honor pc"
+        if initial_msr is None:
+            initial_msr = DEFAULT_MSR
  
          log("ISACaller insns", respect_pc, initial_insns, disassembly)
          log("ISACaller initial_msr", initial_msr)
  
          log("ISACaller insns", respect_pc, initial_insns, disassembly)
          log("ISACaller initial_msr", initial_msr)
@@ -1162,20 +1269,36 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          initial_sprs = deepcopy(initial_sprs)  # so as not to get modified
          self.gpr = GPR(decoder2, self, self.svstate, regfile)
          self.fpr = GPR(decoder2, self, self.svstate, fpregfile)
          initial_sprs = deepcopy(initial_sprs)  # so as not to get modified
          self.gpr = GPR(decoder2, self, self.svstate, regfile)
          self.fpr = GPR(decoder2, self, self.svstate, fpregfile)
-        self.spr = SPR(decoder2, initial_sprs)  # initialise SPRs before MMU
+        # initialise SPRs before MMU
+        self.spr = SPR(decoder2, initial_sprs, gpr=self.gpr)
  
          # set up 4 dummy SVSHAPEs if they aren't already set up
          for i in range(4):
              sname = 'SVSHAPE%d' % i
              val = self.spr.get(sname, 0)
  
          # set up 4 dummy SVSHAPEs if they aren't already set up
          for i in range(4):
              sname = 'SVSHAPE%d' % i
              val = self.spr.get(sname, 0)
-            # make sure it's an SVSHAPE
-            self.spr[sname] = SVSHAPE(val, self.gpr)
+            # make sure it's an SVSHAPE -- conversion done by SPR.__setitem__
+            self.spr[sname] = val
          self.last_op_svshape = False
  
          # "raw" memory
          self.last_op_svshape = False
  
          # "raw" memory
-        self.mem = Mem(row_bytes=8, initial_mem=initial_mem, misaligned_ok=True)
-        self.mem.log_fancy(kind=LogKind.InstrInOuts)
-        self.imem = Mem(row_bytes=4, initial_mem=initial_insns)
+        if use_mmap_mem:
+            self.mem = MemMMap(row_bytes=8,
+                               initial_mem=initial_mem,
+                               misaligned_ok=True,
+                               emulating_mmap=emulating_mmap)
+            self.imem = self.mem
+            lelf = self.mem.initialize(row_bytes=4, initial_mem=initial_insns)
+            if isinstance(lelf, LoadedELF):  # stuff parsed from ELF
+                initial_pc = lelf.pc
+                for k, v in lelf.gprs.items():
+                    self.gpr[k] = SelectableInt(v, 64)
+                initial_fpscr = lelf.fpscr
+            self.mem.log_fancy(kind=LogType.InstrInOuts)
+        else:
+            self.mem = Mem(row_bytes=8, initial_mem=initial_mem,
+                           misaligned_ok=True)
+            self.mem.log_fancy(kind=LogType.InstrInOuts)
+            self.imem = Mem(row_bytes=4, initial_mem=initial_insns)
          # MMU mode, redirect underlying Mem through RADIX
          if mmu:
              self.mem = RADIX(self.mem, self)
          # MMU mode, redirect underlying Mem through RADIX
          if mmu:
              self.mem = RADIX(self.mem, self)
@@ -1186,6 +1309,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          # FPR (same as GPR except for FP nums)
          # 4.2.2 p124 FPSCR (definitely "separate" - not in SPR)
          #            note that mffs, mcrfs, mtfsf "manage" this FPSCR
          # FPR (same as GPR except for FP nums)
          # 4.2.2 p124 FPSCR (definitely "separate" - not in SPR)
          #            note that mffs, mcrfs, mtfsf "manage" this FPSCR
+        self.fpscr = FPSCRState(initial_fpscr)
+
          # 2.3.1 CR (and sub-fields CR0..CR6 - CR0 SO comes from XER.SO)
          #         note that mfocrf, mfcr, mtcr, mtocrf, mcrxrx "manage" CRs
          #         -- Done
          # 2.3.1 CR (and sub-fields CR0..CR6 - CR0 SO comes from XER.SO)
          #         note that mfocrf, mfcr, mtcr, mtocrf, mcrxrx "manage" CRs
          #         -- Done
@@ -1201,7 +1326,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          self.cr_backup = 0  # sigh, dreadful hack: for fail-first (VLi)
  
          # "undefined", just set to variable-bit-width int (use exts "max")
          self.cr_backup = 0  # sigh, dreadful hack: for fail-first (VLi)
  
          # "undefined", just set to variable-bit-width int (use exts "max")
-        # self.undefined = SelectableInt(0, 256)  # TODO, not hard-code 256!
+        # self.undefined = SelectableInt(0, EFFECTIVELY_UNLIMITED)
  
          self.namespace = {}
          self.namespace.update(self.spr)
  
          self.namespace = {}
          self.namespace.update(self.spr)
@@ -1219,12 +1344,16 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
                                 'SVSHAPE3': self.spr['SVSHAPE3'],
                                 'CR': self.cr,
                                 'MSR': self.msr,
                                 'SVSHAPE3': self.spr['SVSHAPE3'],
                                 'CR': self.cr,
                                 'MSR': self.msr,
+                               'FPSCR': self.fpscr,
                                 'undefined': undefined,
                                 'mode_is_64bit': True,
                                 'SO': XER_bits['SO'],
                                 'XLEN': 64  # elwidth overrides
                                 })
  
                                 'undefined': undefined,
                                 'mode_is_64bit': True,
                                 'SO': XER_bits['SO'],
                                 'XLEN': 64  # elwidth overrides
                                 })
  
+        for name in BFP_FLAG_NAMES:
+            setattr(self, name, 0)
+
          # update pc to requested start point
          self.set_pc(initial_pc)
  
          # update pc to requested start point
          self.set_pc(initial_pc)
  
@@ -1236,12 +1365,21 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          self.decoder = decoder2.dec
          self.dec2 = decoder2
  
          self.decoder = decoder2.dec
          self.dec2 = decoder2
  
-        super().__init__(XLEN=self.namespace["XLEN"])
+        super().__init__(XLEN=self.namespace["XLEN"], FPSCR=self.fpscr)
+
+    def trace(self, out):
+        if self.insnlog is None:
+            return
+        self.insnlog.write(out)
  
      @property
      def XLEN(self):
          return self.namespace["XLEN"]
  
  
      @property
      def XLEN(self):
          return self.namespace["XLEN"]
  
+    @property
+    def FPSCR(self):
+        return self.fpscr
+
      def call_trap(self, trap_addr, trap_bit):
          """calls TRAP and sets up NIA to the new execution location.
          next instruction will begin at trap_addr.
      def call_trap(self, trap_addr, trap_bit):
          """calls TRAP and sets up NIA to the new execution location.
          next instruction will begin at trap_addr.
@@ -1256,7 +1394,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          TRAP function is callable from inside the pseudocode itself,
          hence the default arguments.  when calling from inside ISACaller
          it is best to use call_trap()
          TRAP function is callable from inside the pseudocode itself,
          hence the default arguments.  when calling from inside ISACaller
          it is best to use call_trap()
+
+        trap_addr: int | SelectableInt
+            the address to go to (before any modifications from `KAIVB`)
+        trap_bit: int | None
+            the bit in `SRR1` to set, `None` means don't set any bits.
          """
          """
+        if isinstance(trap_addr, SelectableInt):
+            trap_addr = trap_addr.value
          # https://bugs.libre-soc.org/show_bug.cgi?id=859
          kaivb = self.spr['KAIVB'].value
          msr = self.namespace['MSR'].value
          # https://bugs.libre-soc.org/show_bug.cgi?id=859
          kaivb = self.spr['KAIVB'].value
          msr = self.namespace['MSR'].value
@@ -1269,7 +1414,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          if self.is_svp64_mode:
              self.spr['SVSRR0'] = self.namespace['SVSTATE'].value
          self.trap_nia = SelectableInt(trap_addr | (kaivb & ~0x1fff), 64)
          if self.is_svp64_mode:
              self.spr['SVSRR0'] = self.namespace['SVSTATE'].value
          self.trap_nia = SelectableInt(trap_addr | (kaivb & ~0x1fff), 64)
-        self.spr['SRR1'][trap_bit] = 1  # change *copy* of MSR in SRR1
+        if trap_bit is not None:
+            self.spr['SRR1'][trap_bit] = 1  # change *copy* of MSR in SRR1
  
          # set exception bits.  TODO: this should, based on the address
          # in figure 66 p1065 V3.0B and the table figure 65 p1063 set these
  
          # set exception bits.  TODO: this should, based on the address
          # in figure 66 p1065 V3.0B and the table figure 65 p1063 set these
@@ -1333,6 +1479,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          self.namespace['XER'] = self.spr['XER']
          self.namespace['CA'] = self.spr['XER'][XER_bits['CA']].value
          self.namespace['CA32'] = self.spr['XER'][XER_bits['CA32']].value
          self.namespace['XER'] = self.spr['XER']
          self.namespace['CA'] = self.spr['XER'][XER_bits['CA']].value
          self.namespace['CA32'] = self.spr['XER'][XER_bits['CA32']].value
+        self.namespace['OV'] = self.spr['XER'][XER_bits['OV']].value
+        self.namespace['OV32'] = self.spr['XER'][XER_bits['OV32']].value
          self.namespace['XLEN'] = xlen
  
          # add some SVSTATE convenience variables
          self.namespace['XLEN'] = xlen
  
          # add some SVSTATE convenience variables
@@ -1346,28 +1494,143 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          self.cr_backup = self.cr.value
  
          # sv.bc* need some extra fields
          self.cr_backup = self.cr.value
  
          # sv.bc* need some extra fields
-        if self.is_svp64_mode and insn_name.startswith("sv.bc"):
-            # blegh grab bits manually
-            mode = yield self.dec2.rm_dec.rm_in.mode
-            # convert to SelectableInt before test
-            mode = SelectableInt(mode, 5)
-            bc_vlset = mode[SVP64MODEb.BC_VLSET] != 0
-            bc_vli = mode[SVP64MODEb.BC_VLI] != 0
-            bc_snz = mode[SVP64MODEb.BC_SNZ] != 0
-            bc_vsb = yield self.dec2.rm_dec.bc_vsb
-            bc_lru = yield self.dec2.rm_dec.bc_lru
-            bc_gate = yield self.dec2.rm_dec.bc_gate
-            sz = yield self.dec2.rm_dec.pred_sz
-            self.namespace['mode'] = SelectableInt(mode, 5)
-            self.namespace['ALL'] = SelectableInt(bc_gate, 1)
-            self.namespace['VSb'] = SelectableInt(bc_vsb, 1)
-            self.namespace['LRu'] = SelectableInt(bc_lru, 1)
-            self.namespace['VLSET'] = SelectableInt(bc_vlset, 1)
-            self.namespace['VLI'] = SelectableInt(bc_vli, 1)
-            self.namespace['sz'] = SelectableInt(sz, 1)
-            self.namespace['SNZ'] = SelectableInt(bc_snz, 1)
-
-    def handle_carry_(self, inputs, output, ca, ca32):
+        if not self.is_svp64_mode or not insn_name.startswith("sv.bc"):
+            return
+
+        # blegh grab bits manually
+        mode = yield self.dec2.rm_dec.rm_in.mode
+        # convert to SelectableInt before test
+        mode = SelectableInt(mode, 5)
+        bc_vlset = mode[SVP64MODEb.BC_VLSET] != 0
+        bc_vli = mode[SVP64MODEb.BC_VLI] != 0
+        bc_snz = mode[SVP64MODEb.BC_SNZ] != 0
+        bc_vsb = yield self.dec2.rm_dec.bc_vsb
+        bc_ctrtest = yield self.dec2.rm_dec.bc_ctrtest
+        bc_lru = yield self.dec2.rm_dec.bc_lru
+        bc_gate = yield self.dec2.rm_dec.bc_gate
+        sz = yield self.dec2.rm_dec.pred_sz
+        self.namespace['mode'] = SelectableInt(mode, 5)
+        self.namespace['ALL'] = SelectableInt(bc_gate, 1)
+        self.namespace['VSb'] = SelectableInt(bc_vsb, 1)
+        self.namespace['LRu'] = SelectableInt(bc_lru, 1)
+        self.namespace['CTRtest'] = SelectableInt(bc_ctrtest, 1)
+        self.namespace['VLSET'] = SelectableInt(bc_vlset, 1)
+        self.namespace['VLI'] = SelectableInt(bc_vli, 1)
+        self.namespace['sz'] = SelectableInt(sz, 1)
+        self.namespace['SNZ'] = SelectableInt(bc_snz, 1)
+
+    def get_kludged_op_add_ca_ov(self, inputs, inp_ca_ov):
+        """ this was not at all necessary to do.  this function massively
+        duplicates - in a laborious and complex fashion - the contents of
+        the CSV files that were extracted two years ago from microwatt's
+        source code.  A-inversion is the "inv A" column, output inversion
+        is the "inv out" column, carry-in equal to 0 or 1 or CA is the
+        "cry in" column
+
+        all of that information is available in
+            self.instrs[ins_name].op_fields
+        where info is usually assigned to self.instrs[ins_name]
+
+        https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=openpower/isatables/minor_31.csv;hb=HEAD
+
+        the immediate constants are *also* decoded correctly and placed
+        usually by DecodeIn2Imm into operand2, as part of power_decoder2.py
+        """
+        def ca(a, b, ca_in, width):
+            mask = (1 << width) - 1
+            y = (a & mask) + (b & mask) + ca_in
+            return y >> width
+
+        asmcode = yield self.dec2.dec.op.asmcode
+        insn = insns.get(asmcode)
+        SI = yield self.dec2.dec.SI
+        SI &= 0xFFFF
+        CA, OV = inp_ca_ov
+        inputs = [i.value for i in inputs]
+        if SI & 0x8000:
+            SI -= 0x10000
+        if insn in ("add", "addo", "addc", "addco"):
+            a = inputs[0]
+            b = inputs[1]
+            ca_in = 0
+        elif insn == "addic" or insn == "addic.":
+            a = inputs[0]
+            b = SI
+            ca_in = 0
+        elif insn in ("subf", "subfo", "subfc", "subfco"):
+            a = ~inputs[0]
+            b = inputs[1]
+            ca_in = 1
+        elif insn == "subfic":
+            a = ~inputs[0]
+            b = SI
+            ca_in = 1
+        elif insn == "adde" or insn == "addeo":
+            a = inputs[0]
+            b = inputs[1]
+            ca_in = CA
+        elif insn == "subfe" or insn == "subfeo":
+            a = ~inputs[0]
+            b = inputs[1]
+            ca_in = CA
+        elif insn == "addme" or insn == "addmeo":
+            a = inputs[0]
+            b = ~0
+            ca_in = CA
+        elif insn == "addze" or insn == "addzeo":
+            a = inputs[0]
+            b = 0
+            ca_in = CA
+        elif insn == "subfme" or insn == "subfmeo":
+            a = ~inputs[0]
+            b = ~0
+            ca_in = CA
+        elif insn == "subfze" or insn == "subfzeo":
+            a = ~inputs[0]
+            b = 0
+            ca_in = CA
+        elif insn == "addex":
+            # CA[32] aren't actually written, just generate so we have
+            # something to return
+            ca64 = ov64 = ca(inputs[0], inputs[1], OV, 64)
+            ca32 = ov32 = ca(inputs[0], inputs[1], OV, 32)
+            return ca64, ca32, ov64, ov32
+        elif insn == "neg" or insn == "nego":
+            a = ~inputs[0]
+            b = 0
+            ca_in = 1
+        else:
+            raise NotImplementedError(
+                "op_add kludge unimplemented instruction: ", asmcode, insn)
+
+        ca64 = ca(a, b, ca_in, 64)
+        ca32 = ca(a, b, ca_in, 32)
+        ov64 = ca64 != ca(a, b, ca_in, 63)
+        ov32 = ca32 != ca(a, b, ca_in, 31)
+        return ca64, ca32, ov64, ov32
+
+    def handle_carry_(self, inputs, output, ca, ca32, inp_ca_ov):
+        if ca is not None and ca32 is not None:
+            return
+        op = yield self.dec2.e.do.insn_type
+        if op == MicrOp.OP_ADD.value and ca is None and ca32 is None:
+            retval = yield from self.get_kludged_op_add_ca_ov(
+                inputs, inp_ca_ov)
+            ca, ca32, ov, ov32 = retval
+            asmcode = yield self.dec2.dec.op.asmcode
+            if insns.get(asmcode) == 'addex':
+                # TODO: if 32-bit mode, set ov to ov32
+                self.spr['XER'][XER_bits['OV']] = ov
+                self.spr['XER'][XER_bits['OV32']] = ov32
+                log(f"write OV/OV32 OV={ov} OV32={ov32}",
+                    kind=LogType.InstrInOuts)
+            else:
+                # TODO: if 32-bit mode, set ca to ca32
+                self.spr['XER'][XER_bits['CA']] = ca
+                self.spr['XER'][XER_bits['CA32']] = ca32
+                log(f"write CA/CA32 CA={ca} CA32={ca32}",
+                    kind=LogType.InstrInOuts)
+            return
          inv_a = yield self.dec2.e.do.invert_in
          if inv_a:
              inputs[0] = ~inputs[0]
          inv_a = yield self.dec2.e.do.invert_in
          if inv_a:
              inputs[0] = ~inputs[0]
@@ -1412,7 +1675,17 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          if ca32 is None:  # already written
              self.spr['XER'][XER_bits['CA32']] = cy32
  
          if ca32 is None:  # already written
              self.spr['XER'][XER_bits['CA32']] = cy32
  
-    def handle_overflow(self, inputs, output, div_overflow):
+    def handle_overflow(self, inputs, output, div_overflow, inp_ca_ov):
+        op = yield self.dec2.e.do.insn_type
+        if op == MicrOp.OP_ADD.value:
+            retval = yield from self.get_kludged_op_add_ca_ov(
+                inputs, inp_ca_ov)
+            ca, ca32, ov, ov32 = retval
+            # TODO: if 32-bit mode, set ov to ov32
+            self.spr['XER'][XER_bits['OV']] = ov
+            self.spr['XER'][XER_bits['OV32']] = ov32
+            self.spr['XER'][XER_bits['SO']] |= ov
+            return
          if hasattr(self.dec2.e.do, "invert_in"):
              inv_a = yield self.dec2.e.do.invert_in
              if inv_a:
          if hasattr(self.dec2.e.do, "invert_in"):
              inv_a = yield self.dec2.e.do.invert_in
              if inv_a:
@@ -1473,10 +1746,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          else:
              SO = self.spr['XER'][XER_bits['SO']]
          log("handle_comparison SO", SO.value,
          else:
              SO = self.spr['XER'][XER_bits['SO']]
          log("handle_comparison SO", SO.value,
-                    "overflow", overflow,
-                    "zero", zero.value,
-                    "+ve", positive.value,
-                     "-ve", negative.value)
+            "overflow", overflow,
+            "zero", zero.value,
+            "+ve", positive.value,
+            "-ve", negative.value)
          # alternative overflow checking (setvl mainly at the moment)
          if overflow is not None and overflow == 1:
              SO = SelectableInt(1, 1)
          # alternative overflow checking (setvl mainly at the moment)
          if overflow is not None and overflow == 1:
              SO = SelectableInt(1, 1)
@@ -1507,6 +1780,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          pc, insn = self.get_next_insn()
          yield from self.setup_next_insn(pc, insn)
  
          pc, insn = self.get_next_insn()
          yield from self.setup_next_insn(pc, insn)
  
+    # cache since it's really slow to construct
+    __PREFIX_CACHE = SVP64Instruction.Prefix(SelectableInt(value=0, bits=32))
+
+    def __decode_prefix(self, opcode):
+        pfx = self.__PREFIX_CACHE
+        pfx.storage.eq(opcode)
+        return pfx
+
      def setup_next_insn(self, pc, ins):
          """set up next instruction
          """
      def setup_next_insn(self, pc, ins):
          """set up next instruction
          """
@@ -1526,7 +1807,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          yield Settle()
          opcode = yield self.dec2.dec.opcode_in
          opcode = SelectableInt(value=opcode, bits=32)
          yield Settle()
          opcode = yield self.dec2.dec.opcode_in
          opcode = SelectableInt(value=opcode, bits=32)
-        pfx = SVP64Instruction.Prefix(opcode)
+        pfx = self.__decode_prefix(opcode)
          log("prefix test: opcode:", pfx.PO, bin(pfx.PO), pfx.id)
          self.is_svp64_mode = bool((pfx.PO == 0b000001) and (pfx.id == 0b11))
          self.pc.update_nia(self.is_svp64_mode)
          log("prefix test: opcode:", pfx.PO, bin(pfx.PO), pfx.id)
          self.is_svp64_mode = bool((pfx.PO == 0b000001) and (pfx.id == 0b11))
          self.pc.update_nia(self.is_svp64_mode)
@@ -1590,6 +1871,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
              # not supported yet:
              raise e                          # ... re-raise
  
              # not supported yet:
              raise e                          # ... re-raise
  
+        # append to the trace log file
+        self.trace(" # %s\n" % code)
+
          log("gprs after code", code)
          self.gpr.dump()
          crs = []
          log("gprs after code", code)
          self.gpr.dump()
          crs = []
@@ -1715,7 +1999,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          dec_insn = yield self.dec2.e.do.insn
          return dec_insn & (1 << 20) != 0  # sigh - XFF.spr[-1]?
  
          dec_insn = yield self.dec2.e.do.insn
          return dec_insn & (1 << 20) != 0  # sigh - XFF.spr[-1]?
  
-    def call(self, name):
+    def call(self, name, syscall_emu_active=False):
          """call(opcode) - the primary execution point for instructions
          """
          self.last_st_addr = None  # reset the last known store address
          """call(opcode) - the primary execution point for instructions
          """
          self.last_st_addr = None  # reset the last known store address
@@ -1729,7 +2013,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          # TODO, asmregs is from the spec, e.g. add RT,RA,RB
          # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
          asmop = yield from self.get_assembly_name()
          # TODO, asmregs is from the spec, e.g. add RT,RA,RB
          # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
          asmop = yield from self.get_assembly_name()
-        log("call", ins_name, asmop)
+        log("call", ins_name, asmop,
+            kind=LogType.InstrInOuts)
  
          # sv.setvl is *not* a loop-function. sigh
          log("is_svp64_mode", self.is_svp64_mode, asmop)
  
          # sv.setvl is *not* a loop-function. sigh
          log("is_svp64_mode", self.is_svp64_mode, asmop)
@@ -1762,6 +2047,33 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
              self.halted = True
              return
  
              self.halted = True
              return
  
+        # User mode system call emulation consists of several steps:
+        # 1. Detect whether instruction is sc or scv.
+        # 2. Call the HDL implementation which invokes trap.
+        # 3. Reroute the guest system call to host system call.
+        # 4. Force return from the interrupt as if we had guest OS.
+        if ((asmop in ("sc", "scv")) and
+                (self.syscall is not None) and
+                not syscall_emu_active):
+            # Memoize PC and trigger an interrupt
+            if self.respect_pc:
+                pc = self.pc.CIA.value
+            else:
+                pc = self.fake_pc
+            yield from self.call(asmop, syscall_emu_active=True)
+
+            # Reroute the syscall to host OS
+            identifier = self.gpr(0)
+            arguments = map(self.gpr, range(3, 9))
+            result = self.syscall(identifier, *arguments)
+            self.gpr.write(3, result, False, self.namespace["XLEN"])
+
+            # Return from interrupt
+            yield from self.call("rfid", syscall_emu_active=True)
+            return
+        elif ((name in ("rfid", "hrfid")) and syscall_emu_active):
+            asmop = "rfid"
+
          # check illegal instruction
          illegal = False
          if ins_name not in ['mtcrf', 'mtocrf']:
          # check illegal instruction
          illegal = False
          if ins_name not in ['mtcrf', 'mtocrf']:
@@ -1770,19 +2082,33 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          # list of instructions not being supported by binutils (.long)
          dotstrp = asmop[:-1] if asmop[-1] == '.' else asmop
          if dotstrp in [*FPTRANS_INSNS,
          # list of instructions not being supported by binutils (.long)
          dotstrp = asmop[:-1] if asmop[-1] == '.' else asmop
          if dotstrp in [*FPTRANS_INSNS,
+                       *LDST_UPDATE_INSNS,
                         'ffmadds', 'fdmadds', 'ffadds',
                         'ffmadds', 'fdmadds', 'ffadds',
-                       'mins', 'maxs', 'minu', 'maxu',
+                       'minmax',
+                       "brh", "brw", "brd",
                         'setvl', 'svindex', 'svremap', 'svstep',
                         'svshape', 'svshape2',
                         'setvl', 'svindex', 'svremap', 'svstep',
                         'svshape', 'svshape2',
-                       'grev', 'ternlogi', 'bmask', 'cprop',
+                       'ternlogi', 'bmask', 'cprop', 'gbbd',
                         'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd',
                         'fmvis', 'fishmv', 'pcdec', "maddedu", "divmod2du",
                         "dsld", "dsrd", "maddedus",
                         'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd',
                         'fmvis', 'fishmv', 'pcdec', "maddedu", "divmod2du",
                         "dsld", "dsrd", "maddedus",
-                       "shadd", "shadduw",
+                       "sadd", "saddw", "sadduw",
+                       "cffpr", "cffpro",
+                       "mffpr", "mffprs",
+                       "ctfpr", "ctfprs",
+                       "mtfpr", "mtfprs",
+                       "maddsubrs", "maddrs", "msubrs",
+                       "cfuged", "cntlzdm", "cnttzdm", "pdepd", "pextd",
+                       "setbc", "setbcr", "setnbc", "setnbcr",
                         ]:
              illegal = False
              ins_name = dotstrp
  
                         ]:
              illegal = False
              ins_name = dotstrp
  
+        # match against instructions treated as nop, see nop below
+        if asmop.startswith("dcbt"):
+            illegal = False
+            ins_name = "nop"
+
          # branch-conditional redirects to sv.bc
          if asmop.startswith('bc') and self.is_svp64_mode:
              ins_name = 'sv.%s' % ins_name
          # branch-conditional redirects to sv.bc
          if asmop.startswith('bc') and self.is_svp64_mode:
              ins_name = 'sv.%s' % ins_name
@@ -1828,7 +2154,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
              ew_src = 8 << (3-int(ew_src))  # convert to bitlength
              ew_dst = 8 << (3-int(ew_dst))  # convert to bitlength
              xlen = max(ew_src, ew_dst)
              ew_src = 8 << (3-int(ew_src))  # convert to bitlength
              ew_dst = 8 << (3-int(ew_dst))  # convert to bitlength
              xlen = max(ew_src, ew_dst)
-            log("elwdith", ew_src, ew_dst)
+            log("elwidth", ew_src, ew_dst)
          log("XLEN:", self.is_svp64_mode, xlen)
  
          # look up instruction in ISA.instrs, prepare namespace
          log("XLEN:", self.is_svp64_mode, xlen)
  
          # look up instruction in ISA.instrs, prepare namespace
@@ -1841,9 +2167,16 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          yield from self.prep_namespace(ins_name, info.form, info.op_fields,
                                         xlen)
  
          yield from self.prep_namespace(ins_name, info.form, info.op_fields,
                                         xlen)
  
+        # dict retains order
+        inputs = dict.fromkeys(create_full_args(
+            read_regs=info.read_regs, special_regs=info.special_regs,
+            uninit_regs=info.uninit_regs, write_regs=info.write_regs))
+
          # preserve order of register names
          # preserve order of register names
-        input_names = create_args(list(info.read_regs) +
-                                  list(info.uninit_regs))
+        write_without_special_regs = OrderedSet(info.write_regs)
+        write_without_special_regs -= OrderedSet(info.special_regs)
+        input_names = create_args([
+            *info.read_regs, *info.uninit_regs, *write_without_special_regs])
          log("input names", input_names)
  
          # get SVP64 entry for the current instruction
          log("input names", input_names)
  
          # get SVP64 entry for the current instruction
@@ -1880,7 +2213,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          if self.is_svp64_mode and vl == 0:
              self.pc.update(self.namespace, self.is_svp64_mode)
              log("SVP64: VL=0, end of call", self.namespace['CIA'],
          if self.is_svp64_mode and vl == 0:
              self.pc.update(self.namespace, self.is_svp64_mode)
              log("SVP64: VL=0, end of call", self.namespace['CIA'],
-                self.namespace['NIA'], kind=LogKind.InstrInOuts)
+                self.namespace['NIA'], kind=LogType.InstrInOuts)
              return
  
          # for when SVREMAP is active, using pre-arranged schedule.
              return
  
          # for when SVREMAP is active, using pre-arranged schedule.
@@ -1905,11 +2238,21 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          log("remap active", bin(remap_active))
  
          # main input registers (RT, RA ...)
          log("remap active", bin(remap_active))
  
          # main input registers (RT, RA ...)
-        inputs = []
          for name in input_names:
          for name in input_names:
-            regval = (yield from self.get_input(name, ew_src))
-            log("regval name", name, regval)
-            inputs.append(regval)
+            if name == "overflow":
+                inputs[name] = SelectableInt(0, 1)
+            elif name == "FPSCR":
+                inputs[name] = self.FPSCR
+            elif name in ("CA", "CA32", "OV", "OV32"):
+                inputs[name] = self.spr['XER'][XER_bits[name]]
+            elif name in "CR0":
+                inputs[name] = self.crl[0]
+            elif name in spr_byname:
+                inputs[name] = self.spr[name]
+            else:
+                regval = (yield from self.get_input(name, ew_src, xlen))
+                log("regval name", name, regval)
+                inputs[name] = regval
  
          # arrrrgh, awful hack, to get _RT into namespace
          if ins_name in ['setvl', 'svstep']:
  
          # arrrrgh, awful hack, to get _RT into namespace
          if ins_name in ['setvl', 'svstep']:
@@ -1930,29 +2273,49 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          # "special" registers
          for special in info.special_regs:
              if special in special_sprs:
          # "special" registers
          for special in info.special_regs:
              if special in special_sprs:
-                inputs.append(self.spr[special])
+                inputs[special] = self.spr[special]
              else:
              else:
-                inputs.append(self.namespace[special])
+                inputs[special] = self.namespace[special]
  
          # clear trap (trap) NIA
          self.trap_nia = None
  
          # check if this was an sv.bc* and create an indicator that
          # this is the last check to be made as a loop.  combined with
  
          # clear trap (trap) NIA
          self.trap_nia = None
  
          # check if this was an sv.bc* and create an indicator that
          # this is the last check to be made as a loop.  combined with
-        # the ALL/ANY mode we can early-exit
+        # the ALL/ANY mode we can early-exit. note that BI (to test)
+        # is an input so there is no termination if BI is scalar
+        # (because early-termination is for *output* scalars)
          if self.is_svp64_mode and ins_name.startswith("sv.bc"):
          if self.is_svp64_mode and ins_name.startswith("sv.bc"):
-            no_in_vec = yield self.dec2.no_in_vec  # BI is scalar
-            end_loop = no_in_vec or srcstep == vl-1 or dststep == vl-1
+            end_loop = srcstep == vl-1 or dststep == vl-1
              self.namespace['end_loop'] = SelectableInt(end_loop, 1)
  
              self.namespace['end_loop'] = SelectableInt(end_loop, 1)
  
+        inp_ca_ov = (self.spr['XER'][XER_bits['CA']].value,
+                     self.spr['XER'][XER_bits['OV']].value)
+
+        for k, v in inputs.items():
+            if v is None:
+                v = SelectableInt(0, self.XLEN)
+            # prevent pseudo-code from modifying input registers
+            v = copy_assign_rhs(v)
+            if isinstance(v, SelectableInt):
+                v.ok = False
+            inputs[k] = v
+
          # execute actual instruction here (finally)
          log("inputs", inputs)
          # execute actual instruction here (finally)
          log("inputs", inputs)
+        inputs = list(inputs.values())
          results = info.func(self, *inputs)
          output_names = create_args(info.write_regs)
          outs = {}
          results = info.func(self, *inputs)
          output_names = create_args(info.write_regs)
          outs = {}
+        # record .ok before anything after the pseudo-code can modify it
+        outs_ok = {}
          for out, n in zip(results or [], output_names):
              outs[n] = out
          for out, n in zip(results or [], output_names):
              outs[n] = out
+            outs_ok[n] = True
+            if isinstance(out, SelectableInt):
+                outs_ok[n] = out.ok
          log("results", outs)
          log("results", outs)
+        log("results ok", outs_ok)
  
          # "inject" decorator takes namespace from function locals: we need to
          # overwrite NIA being overwritten (sigh)
  
          # "inject" decorator takes namespace from function locals: we need to
          # overwrite NIA being overwritten (sigh)
@@ -1974,24 +2337,32 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
  
          # detect if CA/CA32 already in outputs (sra*, basically)
          ca = outs.get("CA")
  
          # detect if CA/CA32 already in outputs (sra*, basically)
          ca = outs.get("CA")
-        ca32 = outs.get("CA32 ")
+        ca32 = outs.get("CA32")
  
          log("carry already done?", ca, ca32, output_names)
  
          log("carry already done?", ca, ca32, output_names)
-        carry_en = yield self.dec2.e.do.output_carry
+        # soc test_pipe_caller tests don't have output_carry
+        has_output_carry = hasattr(self.dec2.e.do, "output_carry")
+        carry_en = has_output_carry and (yield self.dec2.e.do.output_carry)
          if carry_en:
          if carry_en:
-            yield from self.handle_carry_(inputs, results[0], ca, ca32)
+            yield from self.handle_carry_(
+                inputs, results[0], ca, ca32, inp_ca_ov=inp_ca_ov)
  
  
-        # get outout named "overflow" and "CR0"
+        # get output named "overflow" and "CR0"
          overflow = outs.get('overflow')
          cr0 = outs.get('CR0')
          overflow = outs.get('overflow')
          cr0 = outs.get('CR0')
+        cr1 = outs.get('CR1')
  
  
-        if not self.is_svp64_mode:  # yeah just no. not in parallel processing
+        # soc test_pipe_caller tests don't have oe
+        has_oe = hasattr(self.dec2.e.do, "oe")
+        # yeah just no. not in parallel processing
+        if has_oe and not self.is_svp64_mode:
              # detect if overflow was in return result
              ov_en = yield self.dec2.e.do.oe.oe
              ov_ok = yield self.dec2.e.do.oe.ok
              log("internal overflow", ins_name, overflow, "en?", ov_en, ov_ok)
              if ov_en & ov_ok:
              # detect if overflow was in return result
              ov_en = yield self.dec2.e.do.oe.oe
              ov_ok = yield self.dec2.e.do.oe.ok
              log("internal overflow", ins_name, overflow, "en?", ov_en, ov_ok)
              if ov_en & ov_ok:
-                yield from self.handle_overflow(inputs, results[0], overflow)
+                yield from self.handle_overflow(
+                    inputs, results[0], overflow, inp_ca_ov=inp_ca_ov)
  
          # only do SVP64 dest predicated Rc=1 if dest-pred is not enabled
          rc_en = False
  
          # only do SVP64 dest predicated Rc=1 if dest-pred is not enabled
          rc_en = False
@@ -2002,7 +2373,12 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          # XXX TODO: now that CR0 is supported, sort out svstep's pseudocode
          # to write directly to CR0 instead of in ISACaller. hooyahh.
          if rc_en and ins_name not in ['svstep']:
          # XXX TODO: now that CR0 is supported, sort out svstep's pseudocode
          # to write directly to CR0 instead of in ISACaller. hooyahh.
          if rc_en and ins_name not in ['svstep']:
-            yield from self.do_rc_ov(ins_name, results[0], overflow, cr0)
+            if outs_ok.get('FPSCR', False):
+                FPSCR = outs['FPSCR']
+            else:
+                FPSCR = self.FPSCR
+            yield from self.do_rc_ov(
+                ins_name, results[0], overflow, cr0, cr1, FPSCR)
  
          # check failfirst
          ffirst_hit = False, False
  
          # check failfirst
          ffirst_hit = False, False
@@ -2010,11 +2386,23 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
              sv_mode = yield self.dec2.rm_dec.sv_mode
              is_cr = sv_mode == SVMode.CROP.value
              chk = rc_en or is_cr
              sv_mode = yield self.dec2.rm_dec.sv_mode
              is_cr = sv_mode == SVMode.CROP.value
              chk = rc_en or is_cr
+            if outs_ok.get('CR', False):
+                # early write so check_ffirst can see value
+                self.namespace['CR'].eq(outs['CR'])
              ffirst_hit = (yield from self.check_ffirst(info, chk, srcstep))
  
          # any modified return results?
              ffirst_hit = (yield from self.check_ffirst(info, chk, srcstep))
  
          # any modified return results?
-        yield from self.do_outregs_nia(asmop, ins_name, info, outs,
-                                       carry_en, rc_en, ffirst_hit, ew_dst)
+        yield from self.do_outregs(
+            info, outs, carry_en, ffirst_hit, ew_dst, outs_ok)
+
+        # check if a FP Exception occurred. TODO for DD-FFirst, check VLi
+        # and raise the exception *after* if VLi=1 but if VLi=0 then
+        # truncate and make the exception "disappear".
+        if self.FPSCR.FEX and (self.msr[MSRb.FE0] or self.msr[MSRb.FE1]):
+            self.call_trap(0x700, PIb.FP)
+            return
+
+        yield from self.do_nia(asmop, ins_name, rc_en, ffirst_hit)
  
      def check_ffirst(self, info, rc_en, srcstep):
          """fail-first mode: checks a bit of Rc Vector, truncates VL
  
      def check_ffirst(self, info, rc_en, srcstep):
          """fail-first mode: checks a bit of Rc Vector, truncates VL
@@ -2051,9 +2439,10 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          yield Settle()  # let decoder update
          return True, vli_
  
          yield Settle()  # let decoder update
          return True, vli_
  
-    def do_rc_ov(self, ins_name, result, overflow, cr0):
-        if ins_name.startswith("f"):
-            rc_reg = "CR1"  # not calculated correctly yet (not FP compares)
+    def do_rc_ov(self, ins_name, result, overflow, cr0, cr1, FPSCR):
+        cr_out = yield self.dec2.op.cr_out
+        if cr_out == CROutSel.CR1.value:
+            rc_reg = "CR1"
          else:
              rc_reg = "CR0"
          regnum, is_vec = yield from get_cr_out(self.dec2, rc_reg)
          else:
              rc_reg = "CR0"
          regnum, is_vec = yield from get_cr_out(self.dec2, rc_reg)
@@ -2061,35 +2450,52 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          is_setvl = ins_name in ('svstep', 'setvl')
          if is_setvl:
              result = SelectableInt(result.vl, 64)
          is_setvl = ins_name in ('svstep', 'setvl')
          if is_setvl:
              result = SelectableInt(result.vl, 64)
-        #else:
+        # else:
          #    overflow = None  # do not override overflow except in setvl
  
          #    overflow = None  # do not override overflow except in setvl
  
-        # if there was not an explicit CR0 in the pseudocode, do implicit Rc=1
-        if cr0 is None:
+        if rc_reg == "CR1":
+            if cr1 is None:
+                cr1 = int(FPSCR.FX) << 3
+                cr1 |= int(FPSCR.FEX) << 2
+                cr1 |= int(FPSCR.VX) << 1
+                cr1 |= int(FPSCR.OX)
+                log("default fp cr1", cr1)
+            else:
+                log("explicit cr1", cr1)
+            self.crl[regnum].eq(cr1)
+        elif cr0 is None:
+            # if there was not an explicit CR0 in the pseudocode,
+            # do implicit Rc=1
              self.handle_comparison(result, regnum, overflow, no_so=is_setvl)
          else:
              # otherwise we just blat CR0 into the required regnum
              log("explicit rc0", cr0)
              self.crl[regnum].eq(cr0)
  
              self.handle_comparison(result, regnum, overflow, no_so=is_setvl)
          else:
              # otherwise we just blat CR0 into the required regnum
              log("explicit rc0", cr0)
              self.crl[regnum].eq(cr0)
  
-    def do_outregs_nia(self, asmop, ins_name, info, outs,
-                       carry_en, rc_en, ffirst_hit, ew_dst):
+    def do_outregs(self, info, outs, ca_en, ffirst_hit, ew_dst, outs_ok):
          ffirst_hit, vli = ffirst_hit
          ffirst_hit, vli = ffirst_hit
-        # write out any regs for this instruction
-        for name, output in outs.items():
-            yield from self.check_write(info, name, output, carry_en, ew_dst)
+        # write out any regs for this instruction, but only if fail-first is ok
+        # XXX TODO: allow CR-vector to be written out even if ffirst fails
+        if not ffirst_hit or vli:
+            for name, output in outs.items():
+                if not outs_ok[name]:
+                    log("skipping writing output with .ok=False", name, output)
+                    continue
+                yield from self.check_write(info, name, output, ca_en, ew_dst)
          # restore the CR value on non-VLI failfirst (from sv.cmp and others
          # which write directly to CR in the pseudocode (gah, what a mess)
          # if ffirst_hit and not vli:
          #    self.cr.value = self.cr_backup
  
          # restore the CR value on non-VLI failfirst (from sv.cmp and others
          # which write directly to CR in the pseudocode (gah, what a mess)
          # if ffirst_hit and not vli:
          #    self.cr.value = self.cr_backup
  
+    def do_nia(self, asmop, ins_name, rc_en, ffirst_hit):
+        ffirst_hit, vli = ffirst_hit
          if ffirst_hit:
              self.svp64_reset_loop()
              nia_update = True
          else:
              # check advancement of src/dst/sub-steps and if PC needs updating
          if ffirst_hit:
              self.svp64_reset_loop()
              nia_update = True
          else:
              # check advancement of src/dst/sub-steps and if PC needs updating
-            nia_update = (yield from self.check_step_increment(rc_en,
-                                                               asmop, ins_name))
+            nia_update = (yield from self.check_step_increment(
+                rc_en, asmop, ins_name))
          if nia_update:
              self.update_pc_next()
  
          if nia_update:
              self.update_pc_next()
  
@@ -2112,14 +2518,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          if op == MicrOp.OP_LOAD.value:
              if remap_active:
                  offsmul = yield self.dec2.in1_step
          if op == MicrOp.OP_LOAD.value:
              if remap_active:
                  offsmul = yield self.dec2.in1_step
-                log("D-field REMAP src", imm, offsmul)
+                log("D-field REMAP src", imm, offsmul, ldstmode)
              else:
                  offsmul = (srcstep * (subvl+1)) + ssubstep
              else:
                  offsmul = (srcstep * (subvl+1)) + ssubstep
-                log("D-field src", imm, offsmul)
+                log("D-field src", imm, offsmul, ldstmode)
          elif op == MicrOp.OP_STORE.value:
              # XXX NOTE! no bit-reversed STORE! this should not ever be used
              offsmul = (dststep * (subvl+1)) + dsubstep
          elif op == MicrOp.OP_STORE.value:
              # XXX NOTE! no bit-reversed STORE! this should not ever be used
              offsmul = (dststep * (subvl+1)) + dsubstep
-            log("D-field dst", imm, offsmul)
+            log("D-field dst", imm, offsmul, ldstmode)
          # Unit-Strided LD/ST adds offset*width to immediate
          if ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
              ldst_len = yield self.dec2.e.do.data_len
          # Unit-Strided LD/ST adds offset*width to immediate
          if ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
              ldst_len = yield self.dec2.e.do.data_len
@@ -2144,7 +2550,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
              else:
                  self.namespace['D'] = imm
  
              else:
                  self.namespace['D'] = imm
  
-    def get_input(self, name, ew_src):
+    def get_input(self, name, ew_src, xlen):
          # using PowerDecoder2, first, find the decoder index.
          # (mapping name RA RB RC RS to in1, in2, in3)
          regnum, is_vec = yield from get_idx_in(self.dec2, name, True)
          # using PowerDecoder2, first, find the decoder index.
          # (mapping name RA RB RC RS to in1, in2, in3)
          regnum, is_vec = yield from get_idx_in(self.dec2, name, True)
@@ -2171,11 +2577,18 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          if not self.is_svp64_mode or not self.pred_src_zero:
              log('reading reg %s %s' % (name, str(regnum)), is_vec)
              if name in fregs:
          if not self.is_svp64_mode or not self.pred_src_zero:
              log('reading reg %s %s' % (name, str(regnum)), is_vec)
              if name in fregs:
-                reg_val = SelectableInt(self.fpr(base, is_vec, offs, ew_src))
-                log("read reg %d/%d: 0x%x" % (base, offs, reg_val.value))
+                fval = self.fpr(base, is_vec, offs, ew_src)
+                reg_val = SelectableInt(fval)
+                assert ew_src == XLEN, "TODO fix elwidth conversion"
+                self.trace("r:FPR:%d:%d:%d " % (base, offs, ew_src))
+                log("read fp reg %d/%d: 0x%x" % (base, offs, reg_val.value),
+                    kind=LogType.InstrInOuts)
              elif name is not None:
              elif name is not None:
-                reg_val = SelectableInt(self.gpr(base, is_vec, offs, ew_src))
-                log("read reg %d/%d: 0x%x" % (base, offs, reg_val.value))
+                gval = self.gpr(base, is_vec, offs, ew_src)
+                reg_val = SelectableInt(gval.value, bits=xlen)
+                self.trace("r:GPR:%d:%d:%d " % (base, offs, ew_src))
+                log("read int reg %d/%d: 0x%x" % (base, offs, reg_val.value),
+                    kind=LogType.InstrInOuts)
          else:
              log('zero input reg %s %s' % (name, str(regnum)), is_vec)
              reg_val = SelectableInt(0, ew_src)
          else:
              log('zero input reg %s %s' % (name, str(regnum)), is_vec)
              reg_val = SelectableInt(0, ew_src)
@@ -2249,8 +2662,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          if name == 'CR0':  # ignore, done already (above)
              return
          if isinstance(output, int):
          if name == 'CR0':  # ignore, done already (above)
              return
          if isinstance(output, int):
-            output = SelectableInt(output, 256)
-        # write carry flafs
+            output = SelectableInt(output, EFFECTIVELY_UNLIMITED)
+        # write FPSCR
+        if name in ['FPSCR', ]:
+            log("write FPSCR 0x%x" % (output.value))
+            self.FPSCR.eq(output)
+            return
+        # write carry flags
          if name in ['CA', 'CA32']:
              if carry_en:
                  log("writing %s to XER" % name, output)
          if name in ['CA', 'CA32']:
              if carry_en:
                  log("writing %s to XER" % name, output)
@@ -2262,7 +2680,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          # write special SPRs
          if name in info.special_regs:
              log('writing special %s' % name, output, special_sprs)
          # write special SPRs
          if name in info.special_regs:
              log('writing special %s' % name, output, special_sprs)
-            log("write reg %s 0x%x" % (name, output.value))
+            log("write reg %s 0x%x" % (name, output.value),
+                kind=LogType.InstrInOuts)
              if name in special_sprs:
                  self.spr[name] = output
              else:
              if name in special_sprs:
                  self.spr[name] = output
              else:
@@ -2286,17 +2705,20 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          # check zeroing due to predicate bit being zero
          if self.is_svp64_mode and self.pred_dst_zero:
              log('zeroing reg %s %s' % (str(regnum), str(output)), is_vec)
          # check zeroing due to predicate bit being zero
          if self.is_svp64_mode and self.pred_dst_zero:
              log('zeroing reg %s %s' % (str(regnum), str(output)), is_vec)
-            output = SelectableInt(0, 256)
+            output = SelectableInt(0, EFFECTIVELY_UNLIMITED)
          log("write reg %s%s 0x%x ew %d" % (reg_prefix, str(regnum),
                                             output.value, ew_dst),
          log("write reg %s%s 0x%x ew %d" % (reg_prefix, str(regnum),
                                             output.value, ew_dst),
-            kind=LogKind.InstrInOuts)
+            kind=LogType.InstrInOuts)
          # zero-extend tov64 bit begore storing (should use EXT oh well)
          if output.bits > 64:
              output = SelectableInt(output.value, 64)
          # zero-extend tov64 bit begore storing (should use EXT oh well)
          if output.bits > 64:
              output = SelectableInt(output.value, 64)
+        rnum, base, offset = regnum
          if name in fregs:
              self.fpr.write(regnum, output, is_vec, ew_dst)
          if name in fregs:
              self.fpr.write(regnum, output, is_vec, ew_dst)
+            self.trace("w:FPR:%d:%d:%d " % (rnum, offset, ew_dst))
          else:
              self.gpr.write(regnum, output, is_vec, ew_dst)
          else:
              self.gpr.write(regnum, output, is_vec, ew_dst)
+            self.trace("w:GPR:%d:%d:%d " % (rnum, offset, ew_dst))
  
      def check_step_increment(self, rc_en, asmop, ins_name):
          # check if it is the SVSTATE.src/dest step that needs incrementing
  
      def check_step_increment(self, rc_en, asmop, ins_name):
          # check if it is the SVSTATE.src/dest step that needs incrementing
@@ -2455,6 +2877,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          vfirst = self.svstate.vfirst
          log("    SV Vertical First", vf, vfirst)
          if not vf and vfirst == 1:
          vfirst = self.svstate.vfirst
          log("    SV Vertical First", vf, vfirst)
          if not vf and vfirst == 1:
+            if insn_name.startswith("sv.bc"):
+                self.update_pc_next()
+                return False
              self.update_nia()
              return True
  
              self.update_nia()
              return True
  
@@ -2488,7 +2913,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          log("    reverse", reverse_gear)
          log("    out_vec", out_vec)
          log("    in_vec", in_vec)
          log("    reverse", reverse_gear)
          log("    out_vec", out_vec)
          log("    in_vec", in_vec)
-        log("    sv_ptype", sv_ptype, sv_ptype == SVPtype.P2.value)
+        log("    sv_ptype", sv_ptype, sv_ptype == SVPType.P2.value)
          # check if this was an sv.bc* and if so did it succeed
          if self.is_svp64_mode and insn_name.startswith("sv.bc"):
              end_loop = self.namespace['end_loop']
          # check if this was an sv.bc* and if so did it succeed
          if self.is_svp64_mode and insn_name.startswith("sv.bc"):
              end_loop = self.namespace['end_loop']
@@ -2500,7 +2925,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          # check if srcstep needs incrementing by one, stop PC advancing
          # but for 2-pred both src/dest have to be checked.
          # XXX this might not be true! it may just be LD/ST
          # check if srcstep needs incrementing by one, stop PC advancing
          # but for 2-pred both src/dest have to be checked.
          # XXX this might not be true! it may just be LD/ST
-        if sv_ptype == SVPtype.P2.value:
+        if sv_ptype == SVPType.P2.value:
              svp64_is_vector = (out_vec or in_vec)
          else:
              svp64_is_vector = out_vec
              svp64_is_vector = (out_vec or in_vec)
          else:
              svp64_is_vector = out_vec
@@ -2521,7 +2946,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
          # not an SVP64 branch, so fix PC (NIA==CIA) for next loop
          # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64)
          # this way we keep repeating the same instruction (with new steps)
          # not an SVP64 branch, so fix PC (NIA==CIA) for next loop
          # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64)
          # this way we keep repeating the same instruction (with new steps)
-        self.pc.NIA.value = self.pc.CIA.value
+        self.pc.NIA.eq(self.pc.CIA)
          self.namespace['NIA'] = self.pc.NIA
          log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA'])
          return False  # DO NOT allow PC update whilst Sub-PC loop running
          self.namespace['NIA'] = self.pc.NIA
          log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA'])
          return False  # DO NOT allow PC update whilst Sub-PC loop running