Implement CR predication
[soc.git] / src / soc / simple / issuer.py
index aaed35085c3f407976ce3e1cb36d2e0c05173cd7..d2c248d50bef2566784967afa2429089c207f0d6 100644 (file)
@@ -16,11 +16,13 @@ improved.
 """
 
 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
-                    ClockDomain, DomainRenamer, Mux, Const, Repl)
+                    ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
 from nmigen.cli import rtlil
 from nmigen.cli import main
 import sys
 
+from nmigen.lib.coding import PriorityEncoder
+
 from soc.decoder.power_decoder import create_pdecode
 from soc.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
@@ -32,6 +34,7 @@ from soc.config.test.test_loadstore import TestMemPspec
 from soc.config.ifetch import ConfigFetchUnit
 from soc.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
                                      SVP64PredMode)
+from soc.consts import (CR, SVP64CROffs)
 from soc.debug.dmi import CoreDebug, DMIInterface
 from soc.debug.jtag import JTAG
 from soc.config.pinouts import get_pinspecs
@@ -54,22 +57,23 @@ def get_insn(f_instr_o, pc):
         return f_instr_o.word_select(pc[2], 32)
 
 # gets state input or reads from state regfile
-def state_get(m, state_i, name, regfile, regnum):
+def state_get(m, core_rst, state_i, name, regfile, regnum):
     comb = m.d.comb
     sync = m.d.sync
     # read the PC
     res = Signal(64, reset_less=True, name=name)
     res_ok_delay = Signal(name="%s_ok_delay" % name)
-    sync += res_ok_delay.eq(~state_i.ok)
-    with m.If(state_i.ok):
-        # incoming override (start from pc_i)
-        comb += res.eq(state_i.data)
-    with m.Else():
-        # otherwise read StateRegs regfile for PC...
-        comb += regfile.ren.eq(1<<regnum)
-    # ... but on a 1-clock delay
-    with m.If(res_ok_delay):
-        comb += res.eq(regfile.data_o)
+    with m.If(~core_rst):
+        sync += res_ok_delay.eq(~state_i.ok)
+        with m.If(state_i.ok):
+            # incoming override (start from pc_i)
+            comb += res.eq(state_i.data)
+        with m.Else():
+            # otherwise read StateRegs regfile for PC...
+            comb += regfile.ren.eq(1<<regnum)
+        # ... but on a 1-clock delay
+        with m.If(res_ok_delay):
+            comb += res.eq(regfile.data_o)
     return res
 
 def get_predint(m, mask, name):
@@ -120,29 +124,29 @@ def get_predcr(m, mask, name):
     invert = Signal(name=name+"crinvert")
     with m.Switch(mask):
         with m.Case(SVP64PredCR.LT.value):
-            comb += idx.eq(0)
-            comb += invert.eq(1)
-        with m.Case(SVP64PredCR.GE.value):
-            comb += idx.eq(0)
+            comb += idx.eq(CR.LT)
             comb += invert.eq(0)
-        with m.Case(SVP64PredCR.GT.value):
-            comb += idx.eq(1)
+        with m.Case(SVP64PredCR.GE.value):
+            comb += idx.eq(CR.LT)
             comb += invert.eq(1)
-        with m.Case(SVP64PredCR.LE.value):
-            comb += idx.eq(1)
+        with m.Case(SVP64PredCR.GT.value):
+            comb += idx.eq(CR.GT)
             comb += invert.eq(0)
-        with m.Case(SVP64PredCR.EQ.value):
-            comb += idx.eq(2)
+        with m.Case(SVP64PredCR.LE.value):
+            comb += idx.eq(CR.GT)
             comb += invert.eq(1)
-        with m.Case(SVP64PredCR.NE.value):
-            comb += idx.eq(1)
+        with m.Case(SVP64PredCR.EQ.value):
+            comb += idx.eq(CR.EQ)
             comb += invert.eq(0)
-        with m.Case(SVP64PredCR.SO.value):
-            comb += idx.eq(3)
+        with m.Case(SVP64PredCR.NE.value):
+            comb += idx.eq(CR.EQ)
             comb += invert.eq(1)
-        with m.Case(SVP64PredCR.NS.value):
-            comb += idx.eq(3)
+        with m.Case(SVP64PredCR.SO.value):
+            comb += idx.eq(CR.SO)
             comb += invert.eq(0)
+        with m.Case(SVP64PredCR.NS.value):
+            comb += idx.eq(CR.SO)
+            comb += invert.eq(1)
     return idx, invert
 
 
@@ -158,13 +162,23 @@ class TestIssuerInternal(Elaboratable):
         # test is SVP64 is to be enabled
         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 
+        # and if regfiles are reduced
+        self.regreduce_en = (hasattr(pspec, "regreduce") and
+                                            (pspec.regreduce == True))
+
         # JTAG interface.  add this right at the start because if it's
         # added it *modifies* the pspec, by adding enable/disable signals
         # for parts of the rest of the core
         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
         if self.jtag_en:
-            subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
-                      'pwm', 'sd0', 'sdr'}
+            # XXX MUST keep this up-to-date with litex, and
+            # soc-cocotb-sim, and err.. all needs sorting out, argh
+            subset = ['uart',
+                      'mtwi',
+                      'eint', 'gpio', 'mspi0',
+                      # 'mspi1', - disabled for now
+                      # 'pwm', 'sd0', - disabled for now
+                       'sdr']
             self.jtag = JTAG(get_pinspecs(subset=subset))
             # add signals to pspec to enable/disable icache and dcache
             # (or data and intstruction wishbone if icache/dcache not included)
@@ -207,7 +221,8 @@ class TestIssuerInternal(Elaboratable):
         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
         self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
                                      opkls=IssuerDecode2ToOperand,
-                                     svp64_en=self.svp64_en)
+                                     svp64_en=self.svp64_en,
+                                     regreduce_en=self.regreduce_en)
         if self.svp64_en:
             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 
@@ -241,9 +256,10 @@ class TestIssuerInternal(Elaboratable):
         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 
-        # for predication
-        self.int_pred = intrf.r_ports['pred'] # INT predicate read
-        self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
+        if self.svp64_en:
+            # for predication
+            self.int_pred = intrf.r_ports['pred'] # INT predicate read
+            self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 
         # hack method of keeping an eye on whether branch/trap set the PC
         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
@@ -384,6 +400,8 @@ class TestIssuerInternal(Elaboratable):
         later, a faster way would be to use the 32-bit-wide CR port but
         this is more complex decoding, here.  equivalent code used in
         ISACaller is "from soc.decoder.isa.caller import get_predcr"
+
+        note: this ENTIRE FSM is not to be called when svp64 is disabled
         """
         comb = m.d.comb
         sync = m.d.sync
@@ -392,24 +410,11 @@ class TestIssuerInternal(Elaboratable):
         predmode = rm_dec.predmode
         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
-
-        # elif predmode == CR:
-        #    CR-src sidx, sinvert = get_predcr(m, srcpred)
-        #    CR-dst didx, dinvert = get_predcr(m, dstpred)
-        #    TODO read CR-src and CR-dst into self.srcmask+dstmask with loop
-        #         has to cope with first one then the other
-        #    for cr_idx = FSM-state-loop(0..VL-1):
-        #        FSM-state-trigger-CR-read:
-        #               cr_ren = (1<<7-(cr_idx+SVP64CROffs.CRPred))
-        #               comb += cr_pred.ren.eq(cr_ren)
-        #        FSM-state-1-clock-later-actual-Read:
-        #               cr_field = Signal(4)
-        #               cr_bit = Signal(1)
-        #               # read the CR field, select the appropriate bit
-        #               comb += cr_field.eq(cr_pred.data_o)
-        #               comb += cr_bit.eq(cr_field.bit_select(idx)))
-        #               # just like in branch BO tests
-        #               comd += self.srcmask[cr_idx].eq(inv ^ cr_bit)
+        # get src/dst step, so we can skip already used mask bits
+        cur_state = self.cur_state
+        srcstep = cur_state.svstate.srcstep
+        dststep = cur_state.svstate.dststep
+        cur_vl = cur_state.svstate.vl
 
         # decode predicates
         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
@@ -437,6 +442,11 @@ class TestIssuerInternal(Elaboratable):
                             comb += int_pred.addr.eq(dregread)
                             comb += int_pred.ren.eq(1)
                             m.next = "INT_DST_READ"
+                    with m.Elif(predmode == SVP64PredMode.CR):
+                        # go fetch masks from the CR register file
+                        sync += self.srcmask.eq(0)
+                        sync += self.dstmask.eq(0)
+                        m.next = "CR_READ"
                     with m.Else():
                         sync += self.srcmask.eq(-1)
                         sync += self.dstmask.eq(-1)
@@ -445,7 +455,17 @@ class TestIssuerInternal(Elaboratable):
             with m.State("INT_DST_READ"):
                 # store destination mask
                 inv = Repl(dinvert, 64)
-                sync += self.dstmask.eq(self.int_pred.data_o ^ inv)
+                new_dstmask = Signal(64)
+                with m.If(dunary):
+                    # set selected mask bit for 1<<r3 mode
+                    dst_shift = Signal(range(64))
+                    comb += dst_shift.eq(self.int_pred.data_o & 0b111111)
+                    comb += new_dstmask.eq(1 << dst_shift)
+                with m.Else():
+                    # invert mask if requested
+                    comb += new_dstmask.eq(self.int_pred.data_o ^ inv)
+                # shift-out already used mask bits
+                sync += self.dstmask.eq(new_dstmask >> dststep)
                 # skip fetching source mask register, when zero
                 with m.If(sall1s):
                     sync += self.srcmask.eq(-1)
@@ -459,9 +479,69 @@ class TestIssuerInternal(Elaboratable):
             with m.State("INT_SRC_READ"):
                 # store source mask
                 inv = Repl(sinvert, 64)
-                sync += self.srcmask.eq(self.int_pred.data_o ^ inv)
+                new_srcmask = Signal(64)
+                with m.If(sunary):
+                    # set selected mask bit for 1<<r3 mode
+                    src_shift = Signal(range(64))
+                    comb += src_shift.eq(self.int_pred.data_o & 0b111111)
+                    comb += new_srcmask.eq(1 << src_shift)
+                with m.Else():
+                    # invert mask if requested
+                    comb += new_srcmask.eq(self.int_pred.data_o ^ inv)
+                # shift-out already used mask bits
+                sync += self.srcmask.eq(new_srcmask >> srcstep)
                 m.next = "FETCH_PRED_DONE"
 
+            # fetch masks from the CR register file
+            # implements the following loop:
+            # idx, inv = get_predcr(mask)
+            # mask = 0
+            # for cr_idx in range(vl):
+            #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle to complete
+            #     if cr[idx] ^ inv:
+            #         mask |= 1 << cr_idx
+            # return mask
+            with m.State("CR_READ"):
+                # the CR index to be read, which will be ready by the next cycle
+                cr_idx = Signal.like(cur_vl, reset_less=True)
+                # submit the read operation to the regfile
+                with m.If(cr_idx != cur_vl):
+                    # the CR read port is unary ...
+                    # ren = 1 << cr_idx
+                    # ... in MSB0 convention ...
+                    # ren = 1 << (7 - cr_idx)
+                    # ... and with an offset:
+                    # ren = 1 << (7 - off - cr_idx)
+                    comb += cr_pred.ren.eq(1 << (7 - SVP64CROffs.CRPred - cr_idx))
+                    # signal data valid in the next cycle
+                    cr_read = Signal(reset_less=True)
+                    sync += cr_read.eq(1)
+                    # load the next index
+                    sync += cr_idx.eq(cr_idx + 1)
+                with m.Else():
+                    # exit on loop end
+                    sync += cr_read.eq(0)
+                    sync += cr_idx.eq(0)
+                    m.next = "FETCH_PRED_DONE"
+                with m.If(cr_read):
+                    # compensate for the one cycle delay on the regfile
+                    cur_cr_idx = Signal.like(cur_vl)
+                    comb += cur_cr_idx.eq(cr_idx - 1)
+                    # read the CR field, select the appropriate bit
+                    cr_field = Signal(4)
+                    scr_bit = Signal()
+                    dcr_bit = Signal()
+                    comb += cr_field.eq(cr_pred.data_o)
+                    comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
+                    comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
+                    # set the corresponding mask bit
+                    bit_to_set = Signal.like(self.srcmask)
+                    comb += bit_to_set.eq(1 << cur_cr_idx)
+                    with m.If(scr_bit):
+                        sync += self.srcmask.eq(self.srcmask | bit_to_set)
+                    with m.If(dcr_bit):
+                        sync += self.dstmask.eq(self.dstmask | bit_to_set)
+
             with m.State("FETCH_PRED_DONE"):
                 comb += pred_mask_valid_o.eq(1)
                 with m.If(pred_mask_ready_i):
@@ -521,7 +601,6 @@ class TestIssuerInternal(Elaboratable):
                         m.next = "INSN_WAIT"
                 with m.Else():
                     # tell core it's stopped, and acknowledge debug handshake
-                    comb += core.core_stopped_i.eq(1)
                     comb += dbg.core_stopped_i.eq(1)
                     # while stopped, allow updating the PC and SVSTATE
                     with m.If(self.pc_i.ok):
@@ -533,17 +612,10 @@ class TestIssuerInternal(Elaboratable):
                         comb += update_svstate.eq(1)
                         sync += sv_changed.eq(1)
 
-            # decode the instruction when it arrives
+            # wait for an instruction to arrive from Fetch
             with m.State("INSN_WAIT"):
                 comb += fetch_insn_ready_i.eq(1)
                 with m.If(fetch_insn_valid_o):
-                    # decode the instruction
-                    sync += core.e.eq(pdecode2.e)
-                    sync += core.state.eq(cur_state)
-                    sync += core.raw_insn_i.eq(dec_opcode_i)
-                    sync += core.bigendian_i.eq(self.core_bigendian_i)
-                    # set RA_OR_ZERO detection in satellite decoders
-                    sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
                     # loop into ISSUE_START if it's a SVP64 instruction
                     # and VL == 0.  this because VL==0 is a for-loop
                     # from 0 to 0 i.e. always, always a NOP.
@@ -560,7 +632,7 @@ class TestIssuerInternal(Elaboratable):
                         if self.svp64_en:
                             m.next = "PRED_START"  # start fetching predicate
                         else:
-                            m.next = "INSN_EXECUTE" # skip predication
+                            m.next = "DECODE_SV"  # skip predication
 
             with m.State("PRED_START"):
                 comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
@@ -570,42 +642,85 @@ class TestIssuerInternal(Elaboratable):
             with m.State("MASK_WAIT"):
                 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
                 with m.If(pred_mask_valid_o): # predication masks are ready
-                    m.next = "INSN_EXECUTE"
-
-            # handshake with execution FSM, move to "wait" once acknowledged
-            with m.State("INSN_EXECUTE"):
-                # with m.If(is_svp64_mode):
-                #    TODO advance src/dst step to "skip" over predicated-out
-                #    from self.srcmask and self.dstmask
-                #    https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
-                #    but still without exceeding VL in either case
-                # IMPORTANT: when changing src/dest step, have to
-                # jump to m.next = "DECODE_SV" to deal with the change in
-                # SVSTATE
-
-                with m.If(is_svp64_mode):
+                    m.next = "PRED_SKIP"
 
+            # skip zeros in predicate
+            with m.State("PRED_SKIP"):
+                with m.If(~is_svp64_mode):
+                    m.next = "DECODE_SV"  # nothing to do
+                with m.Else():
                     if self.svp64_en:
                         pred_src_zero = pdecode2.rm_dec.pred_sz
                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 
-                    """
-                    if not pred_src_zero:
-                        if (((1<<cur_srcstep) & self.srcmask) == 0) and
-                              (cur_srcstep != vl):
+                        # new srcstep, after skipping zeros
+                        skip_srcstep = Signal.like(cur_srcstep)
+                        # value to be added to the current srcstep
+                        src_delta = Signal.like(cur_srcstep)
+                        # add leading zeros to srcstep, if not in zero mode
+                        with m.If(~pred_src_zero):
+                            # priority encoder (count leading zeros)
+                            # append guard bit, in case the mask is all zeros
+                            pri_enc_src = PriorityEncoder(65)
+                            m.submodules.pri_enc_src = pri_enc_src
+                            comb += pri_enc_src.i.eq(Cat(self.srcmask,
+                                                         Const(1, 1)))
+                            comb += src_delta.eq(pri_enc_src.o)
+                        # apply delta to srcstep
+                        comb += skip_srcstep.eq(cur_srcstep + src_delta)
+                        # shift-out all leading zeros from the mask
+                        # plus the leading "one" bit
+                        # TODO count leading zeros and shift-out the zero
+                        #      bits, in the same step, in hardware
+                        sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
+
+                        # same as above, but for dststep
+                        skip_dststep = Signal.like(cur_dststep)
+                        dst_delta = Signal.like(cur_dststep)
+                        with m.If(~pred_dst_zero):
+                            pri_enc_dst = PriorityEncoder(65)
+                            m.submodules.pri_enc_dst = pri_enc_dst
+                            comb += pri_enc_dst.i.eq(Cat(self.dstmask,
+                                                         Const(1, 1)))
+                            comb += dst_delta.eq(pri_enc_dst.o)
+                        comb += skip_dststep.eq(cur_dststep + dst_delta)
+                        sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
+
+                        # TODO: initialize mask[VL]=1 to avoid passing past VL
+                        with m.If((skip_srcstep >= cur_vl) |
+                                  (skip_dststep >= cur_vl)):
+                            # end of VL loop. Update PC and reset src/dst step
+                            comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
+                            comb += self.state_w_pc.data_i.eq(nia)
+                            comb += new_svstate.srcstep.eq(0)
+                            comb += new_svstate.dststep.eq(0)
                             comb += update_svstate.eq(1)
-                            comb += new_svstate.srcstep.eq(next_srcstep)
-
-                    if not pred_dst_zero:
-                        if (((1<<cur_dststep) & self.dstmask) == 0) and
-                              (cur_dststep != vl):
-                            comb += new_svstate.dststep.eq(next_dststep)
+                            # synchronize with the simulator
+                            comb += self.insn_done.eq(1)
+                            # go back to Issue
+                            m.next = "ISSUE_START"
+                        with m.Else():
+                            # update new src/dst step
+                            comb += new_svstate.srcstep.eq(skip_srcstep)
+                            comb += new_svstate.dststep.eq(skip_dststep)
                             comb += update_svstate.eq(1)
+                            # proceed to Decode
+                            m.next = "DECODE_SV"
 
-                    if update_svstate:
-                        m.next = "DECODE_SV"
-                    """
+            # after src/dst step have been updated, we are ready
+            # to decode the instruction
+            with m.State("DECODE_SV"):
+                # decode the instruction
+                sync += core.e.eq(pdecode2.e)
+                sync += core.state.eq(cur_state)
+                sync += core.raw_insn_i.eq(dec_opcode_i)
+                sync += core.bigendian_i.eq(self.core_bigendian_i)
+                # set RA_OR_ZERO detection in satellite decoders
+                sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
+                m.next = "INSN_EXECUTE"  # move to "execute"
 
+            # handshake with execution FSM, move to "wait" once acknowledged
+            with m.State("INSN_EXECUTE"):
                 comb += exec_insn_valid_i.eq(1) # trigger execute
                 with m.If(exec_insn_ready_o):   # execute acknowledged us
                     m.next = "EXECUTE_WAIT"
@@ -640,7 +755,12 @@ class TestIssuerInternal(Elaboratable):
                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
                             comb += self.state_w_pc.data_i.eq(nia)
                             # reset SRCSTEP before returning to Fetch
-                            with m.If(pdecode2.loop_continue):
+                            if self.svp64_en:
+                                with m.If(pdecode2.loop_continue):
+                                    comb += new_svstate.srcstep.eq(0)
+                                    comb += new_svstate.dststep.eq(0)
+                                    comb += update_svstate.eq(1)
+                            else:
                                 comb += new_svstate.srcstep.eq(0)
                                 comb += new_svstate.dststep.eq(0)
                                 comb += update_svstate.eq(1)
@@ -651,10 +771,10 @@ class TestIssuerInternal(Elaboratable):
                             comb += new_svstate.srcstep.eq(next_srcstep)
                             comb += new_svstate.dststep.eq(next_dststep)
                             comb += update_svstate.eq(1)
-                            m.next = "DECODE_SV"
+                            # return to mask skip loop
+                            m.next = "PRED_SKIP"
 
                 with m.Else():
-                    comb += core.core_stopped_i.eq(1)
                     comb += dbg.core_stopped_i.eq(1)
                     # while stopped, allow updating the PC and SVSTATE
                     with m.If(self.pc_i.ok):
@@ -666,17 +786,6 @@ class TestIssuerInternal(Elaboratable):
                         comb += update_svstate.eq(1)
                         sync += sv_changed.eq(1)
 
-            # need to decode the instruction again, after updating SRCSTEP
-            # in the previous state.
-            # mostly a copy of INSN_WAIT, but without the actual wait
-            with m.State("DECODE_SV"):
-                # decode the instruction
-                sync += core.e.eq(pdecode2.e)
-                sync += core.state.eq(cur_state)
-                sync += core.bigendian_i.eq(self.core_bigendian_i)
-                sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
-                m.next = "INSN_EXECUTE"  # move to "execute"
-
         # check if svstate needs updating: if so, write it to State Regfile
         with m.If(update_svstate):
             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
@@ -820,6 +929,10 @@ class TestIssuerInternal(Elaboratable):
         # set up peripherals and core
         core_rst = self.setup_peripherals(m)
 
+        # reset current state if core reset requested
+        with m.If(core_rst):
+            m.d.sync += self.cur_state.eq(0)
+
         # PC and instruction from I-Memory
         comb += self.pc_o.eq(cur_state.pc)
         pc_changed = Signal() # note write to PC
@@ -827,9 +940,11 @@ class TestIssuerInternal(Elaboratable):
 
         # read state either from incoming override or from regfile
         # TODO: really should be doing MSR in the same way
-        pc = state_get(m, self.pc_i, "pc",                  # read PC
+        pc = state_get(m, core_rst, self.pc_i,
+                            "pc",                  # read PC
                             self.state_r_pc, StateRegs.PC)
-        svstate = state_get(m, self.svstate_i, "svstate",   # read SVSTATE
+        svstate = state_get(m, core_rst, self.svstate_i,
+                            "svstate",   # read SVSTATE
                             self.state_r_sv, StateRegs.SVSTATE)
 
         # don't write pc every cycle
@@ -841,7 +956,7 @@ class TestIssuerInternal(Elaboratable):
 
         # address of the next instruction, in the absence of a branch
         # depends on the instruction size
-        nia = Signal(64, reset_less=True)
+        nia = Signal(64)
 
         # connect up debug signals
         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
@@ -912,6 +1027,10 @@ class TestIssuerInternal(Elaboratable):
                          exec_insn_valid_i, exec_insn_ready_o,
                          exec_pc_valid_o, exec_pc_ready_i)
 
+        # whatever was done above, over-ride it if core reset is held
+        with m.If(core_rst):
+            sync += nia.eq(0)
+
         # this bit doesn't have to be in the FSM: connect up to read
         # regfiles on demand from DMI
         self.do_dmi(m, dbg)
@@ -1075,6 +1194,7 @@ class TestIssuer(Elaboratable):
         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
         if self.pll_en:
             self.pll_18_o = Signal(reset_less=True)
+            self.clk_sel_i = Signal(reset_less=True)
 
     def elaborate(self, platform):
         m = Module()
@@ -1103,6 +1223,9 @@ class TestIssuer(Elaboratable):
             # output 18 mhz PLL test signal
             comb += self.pll_18_o.eq(pll.pll_18_o)
 
+            # input to pll clock selection
+            comb += Cat(pll.sel_a0_i, pll.sel_a1_i).eq(self.clk_sel_i)
+
             # now wire up ResetSignals.  don't mind them being in this domain
             pll_rst = ResetSignal("pllclk")
             comb += pll_rst.eq(ResetSignal())
@@ -1126,9 +1249,9 @@ class TestIssuer(Elaboratable):
         ports.append(ClockSignal())
         ports.append(ResetSignal())
         if self.pll_en:
-            ports.append(self.pll.clk_sel_i)
+            ports.append(self.clk_sel_i)
             ports.append(self.pll_18_o)
-            ports.append(self.pll.pll_lck_o)
+            ports.append(self.pll.pll_ana_o)
         return ports