"""simple core issuer not in any way intended for production use. this runs a FSM that: * reads the Program Counter from StateRegs * reads an instruction from a fixed-size Test Memory * issues it to the Simple Core * waits for it to complete * increments the PC * does it all over again the purpose of this module is to verify the functional correctness of the Function Units in the absolute simplest and clearest possible way, and to at provide something that can be further incrementally improved. """ from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal, ClockDomain, DomainRenamer, Mux, Const, Repl, Cat) from nmigen.cli import rtlil from nmigen.cli import main import sys from nmigen.lib.coding import PriorityEncoder from soc.decoder.power_decoder import create_pdecode from soc.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder from soc.decoder.decode2execute1 import IssuerDecode2ToOperand from soc.decoder.decode2execute1 import Data from soc.experiment.testmem import TestMemory # test only for instructions from soc.regfile.regfiles import StateRegs, FastRegs from soc.simple.core import NonProductionCore from soc.config.test.test_loadstore import TestMemPspec from soc.config.ifetch import ConfigFetchUnit from soc.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR, SVP64PredMode) from soc.debug.dmi import CoreDebug, DMIInterface from soc.debug.jtag import JTAG from soc.config.pinouts import get_pinspecs from soc.config.state import CoreState from soc.interrupts.xics import XICS_ICP, XICS_ICS from soc.bus.simple_gpio import SimpleGPIO from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W from soc.clock.select import ClockSelect from soc.clock.dummypll import DummyPLL from soc.sv.svstate import SVSTATERec from nmutil.util import rising_edge def get_insn(f_instr_o, pc): if f_instr_o.width == 32: return f_instr_o else: # 64-bit: bit 2 of pc decides which word to select return f_instr_o.word_select(pc[2], 32) # gets state input or reads from state regfile def state_get(m, core_rst, state_i, name, regfile, regnum): comb = m.d.comb sync = m.d.sync # read the PC res = Signal(64, reset_less=True, name=name) res_ok_delay = Signal(name="%s_ok_delay" % name) with m.If(~core_rst): sync += res_ok_delay.eq(~state_i.ok) with m.If(state_i.ok): # incoming override (start from pc_i) comb += res.eq(state_i.data) with m.Else(): # otherwise read StateRegs regfile for PC... comb += regfile.ren.eq(1<> dststep) # skip fetching source mask register, when zero with m.If(sall1s): sync += self.srcmask.eq(-1) m.next = "FETCH_PRED_DONE" # fetch source predicate register with m.Else(): comb += int_pred.addr.eq(sregread) comb += int_pred.ren.eq(1) m.next = "INT_SRC_READ" with m.State("INT_SRC_READ"): # store source mask inv = Repl(sinvert, 64) new_srcmask = Signal(64) with m.If(sunary): # set selected mask bit for 1<> srcstep) m.next = "FETCH_PRED_DONE" with m.State("FETCH_PRED_DONE"): comb += pred_mask_valid_o.eq(1) with m.If(pred_mask_ready_i): m.next = "FETCH_PRED_IDLE" def issue_fsm(self, m, core, pc_changed, sv_changed, nia, dbg, core_rst, is_svp64_mode, fetch_pc_ready_o, fetch_pc_valid_i, fetch_insn_valid_o, fetch_insn_ready_i, pred_insn_valid_i, pred_insn_ready_o, pred_mask_valid_o, pred_mask_ready_i, exec_insn_valid_i, exec_insn_ready_o, exec_pc_valid_o, exec_pc_ready_i): """issue FSM decode / issue FSM. this interacts with the "fetch" FSM through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid (outgoing). also interacts with the "execute" FSM through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid (incoming). SVP64 RM prefixes have already been set up by the "fetch" phase, so execute is fairly straightforward. """ comb = m.d.comb sync = m.d.sync pdecode2 = self.pdecode2 cur_state = self.cur_state # temporaries dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode # for updating svstate (things like srcstep etc.) update_svstate = Signal() # set this (below) if updating new_svstate = SVSTATERec("new_svstate") comb += new_svstate.eq(cur_state.svstate) # precalculate srcstep+1 and dststep+1 cur_srcstep = cur_state.svstate.srcstep cur_dststep = cur_state.svstate.dststep next_srcstep = Signal.like(cur_srcstep) next_dststep = Signal.like(cur_dststep) comb += next_srcstep.eq(cur_state.svstate.srcstep+1) comb += next_dststep.eq(cur_state.svstate.dststep+1) with m.FSM(name="issue_fsm"): # sync with the "fetch" phase which is reading the instruction # at this point, there is no instruction running, that # could inadvertently update the PC. with m.State("ISSUE_START"): # wait on "core stop" release, before next fetch # need to do this here, in case we are in a VL==0 loop with m.If(~dbg.core_stop_o & ~core_rst): comb += fetch_pc_valid_i.eq(1) # tell fetch to start with m.If(fetch_pc_ready_o): # fetch acknowledged us m.next = "INSN_WAIT" with m.Else(): # tell core it's stopped, and acknowledge debug handshake comb += dbg.core_stopped_i.eq(1) # while stopped, allow updating the PC and SVSTATE with m.If(self.pc_i.ok): comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) comb += self.state_w_pc.data_i.eq(self.pc_i.data) sync += pc_changed.eq(1) with m.If(self.svstate_i.ok): comb += new_svstate.eq(self.svstate_i.data) comb += update_svstate.eq(1) sync += sv_changed.eq(1) # wait for an instruction to arrive from Fetch with m.State("INSN_WAIT"): comb += fetch_insn_ready_i.eq(1) with m.If(fetch_insn_valid_o): # loop into ISSUE_START if it's a SVP64 instruction # and VL == 0. this because VL==0 is a for-loop # from 0 to 0 i.e. always, always a NOP. cur_vl = cur_state.svstate.vl with m.If(is_svp64_mode & (cur_vl == 0)): # update the PC before fetching the next instruction # since we are in a VL==0 loop, no instruction was # executed that we could be overwriting comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) comb += self.state_w_pc.data_i.eq(nia) comb += self.insn_done.eq(1) m.next = "ISSUE_START" with m.Else(): if self.svp64_en: m.next = "PRED_START" # start fetching predicate else: m.next = "DECODE_SV" # skip predication with m.State("PRED_START"): comb += pred_insn_valid_i.eq(1) # tell fetch_pred to start with m.If(pred_insn_ready_o): # fetch_pred acknowledged us m.next = "MASK_WAIT" with m.State("MASK_WAIT"): comb += pred_mask_ready_i.eq(1) # ready to receive the masks with m.If(pred_mask_valid_o): # predication masks are ready m.next = "PRED_SKIP" # skip zeros in predicate with m.State("PRED_SKIP"): with m.If(~is_svp64_mode): m.next = "DECODE_SV" # nothing to do with m.Else(): if self.svp64_en: pred_src_zero = pdecode2.rm_dec.pred_sz pred_dst_zero = pdecode2.rm_dec.pred_dz # new srcstep, after skipping zeros skip_srcstep = Signal.like(cur_srcstep) # value to be added to the current srcstep src_delta = Signal.like(cur_srcstep) # add leading zeros to srcstep, if not in zero mode with m.If(~pred_src_zero): # priority encoder (count leading zeros) # append guard bit, in case the mask is all zeros pri_enc_src = PriorityEncoder(65) m.submodules.pri_enc_src = pri_enc_src comb += pri_enc_src.i.eq(Cat(self.srcmask, Const(1, 1))) comb += src_delta.eq(pri_enc_src.o) # apply delta to srcstep comb += skip_srcstep.eq(cur_srcstep + src_delta) # shift-out all leading zeros from the mask # plus the leading "one" bit # TODO count leading zeros and shift-out the zero # bits, in the same step, in hardware sync += self.srcmask.eq(self.srcmask >> (src_delta+1)) # same as above, but for dststep skip_dststep = Signal.like(cur_dststep) dst_delta = Signal.like(cur_dststep) with m.If(~pred_dst_zero): pri_enc_dst = PriorityEncoder(65) m.submodules.pri_enc_dst = pri_enc_dst comb += pri_enc_dst.i.eq(Cat(self.dstmask, Const(1, 1))) comb += dst_delta.eq(pri_enc_dst.o) comb += skip_dststep.eq(cur_dststep + dst_delta) sync += self.dstmask.eq(self.dstmask >> (dst_delta+1)) # TODO: initialize mask[VL]=1 to avoid passing past VL with m.If((skip_srcstep >= cur_vl) | (skip_dststep >= cur_vl)): # end of VL loop. Update PC and reset src/dst step comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) comb += self.state_w_pc.data_i.eq(nia) comb += new_svstate.srcstep.eq(0) comb += new_svstate.dststep.eq(0) comb += update_svstate.eq(1) # synchronize with the simulator comb += self.insn_done.eq(1) # go back to Issue m.next = "ISSUE_START" with m.Else(): # update new src/dst step comb += new_svstate.srcstep.eq(skip_srcstep) comb += new_svstate.dststep.eq(skip_dststep) comb += update_svstate.eq(1) # proceed to Decode m.next = "DECODE_SV" # after src/dst step have been updated, we are ready # to decode the instruction with m.State("DECODE_SV"): # decode the instruction sync += core.e.eq(pdecode2.e) sync += core.state.eq(cur_state) sync += core.raw_insn_i.eq(dec_opcode_i) sync += core.bigendian_i.eq(self.core_bigendian_i) # set RA_OR_ZERO detection in satellite decoders sync += core.sv_a_nz.eq(pdecode2.sv_a_nz) m.next = "INSN_EXECUTE" # move to "execute" # handshake with execution FSM, move to "wait" once acknowledged with m.State("INSN_EXECUTE"): comb += exec_insn_valid_i.eq(1) # trigger execute with m.If(exec_insn_ready_o): # execute acknowledged us m.next = "EXECUTE_WAIT" with m.State("EXECUTE_WAIT"): # wait on "core stop" release, at instruction end # need to do this here, in case we are in a VL>1 loop with m.If(~dbg.core_stop_o & ~core_rst): comb += exec_pc_ready_i.eq(1) with m.If(exec_pc_valid_o): # was this the last loop iteration? is_last = Signal() cur_vl = cur_state.svstate.vl comb += is_last.eq(next_srcstep == cur_vl) # if either PC or SVSTATE were changed by the previous # instruction, go directly back to Fetch, without # updating either PC or SVSTATE with m.If(pc_changed | sv_changed): m.next = "ISSUE_START" # also return to Fetch, when no output was a vector # (regardless of SRCSTEP and VL), or when the last # instruction was really the last one of the VL loop with m.Elif((~pdecode2.loop_continue) | is_last): # before going back to fetch, update the PC state # register with the NIA. # ok here we are not reading the branch unit. # TODO: this just blithely overwrites whatever # pipeline updated the PC comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) comb += self.state_w_pc.data_i.eq(nia) # reset SRCSTEP before returning to Fetch if self.svp64_en: with m.If(pdecode2.loop_continue): comb += new_svstate.srcstep.eq(0) comb += new_svstate.dststep.eq(0) comb += update_svstate.eq(1) else: comb += new_svstate.srcstep.eq(0) comb += new_svstate.dststep.eq(0) comb += update_svstate.eq(1) m.next = "ISSUE_START" # returning to Execute? then, first update SRCSTEP with m.Else(): comb += new_svstate.srcstep.eq(next_srcstep) comb += new_svstate.dststep.eq(next_dststep) comb += update_svstate.eq(1) # return to mask skip loop m.next = "PRED_SKIP" with m.Else(): comb += dbg.core_stopped_i.eq(1) # while stopped, allow updating the PC and SVSTATE with m.If(self.pc_i.ok): comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) comb += self.state_w_pc.data_i.eq(self.pc_i.data) sync += pc_changed.eq(1) with m.If(self.svstate_i.ok): comb += new_svstate.eq(self.svstate_i.data) comb += update_svstate.eq(1) sync += sv_changed.eq(1) # check if svstate needs updating: if so, write it to State Regfile with m.If(update_svstate): comb += self.state_w_sv.wen.eq(1<