"""simple core issuer not in any way intended for production use. this runs a FSM that: * reads the Program Counter from StateRegs * reads an instruction from a fixed-size Test Memory * issues it to the Simple Core * waits for it to complete * increments the PC * does it all over again the purpose of this module is to verify the functional correctness of the Function Units in the absolute simplest and clearest possible way, and to at provide something that can be further incrementally improved. """ from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal, ClockDomain, DomainRenamer, Mux, Const) from nmigen.cli import rtlil from nmigen.cli import main import sys from soc.decoder.power_decoder import create_pdecode from soc.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder from soc.decoder.decode2execute1 import IssuerDecode2ToOperand from soc.decoder.decode2execute1 import Data from soc.experiment.testmem import TestMemory # test only for instructions from soc.regfile.regfiles import StateRegs, FastRegs from soc.simple.core import NonProductionCore from soc.config.test.test_loadstore import TestMemPspec from soc.config.ifetch import ConfigFetchUnit from soc.decoder.power_enums import MicrOp from soc.debug.dmi import CoreDebug, DMIInterface from soc.debug.jtag import JTAG from soc.config.pinouts import get_pinspecs from soc.config.state import CoreState from soc.interrupts.xics import XICS_ICP, XICS_ICS from soc.bus.simple_gpio import SimpleGPIO from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W from soc.clock.select import ClockSelect from soc.clock.dummypll import DummyPLL from soc.sv.svstate import SVSTATERec from nmutil.util import rising_edge def get_insn(f_instr_o, pc): if f_instr_o.width == 32: return f_instr_o else: # 64-bit: bit 2 of pc decides which word to select return f_instr_o.word_select(pc[2], 32) class TestIssuerInternal(Elaboratable): """TestIssuer - reads instructions from TestMemory and issues them efficiency and speed is not the main goal here: functional correctness is. """ def __init__(self, pspec): # test is SVP64 is to be enabled self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True) # JTAG interface. add this right at the start because if it's # added it *modifies* the pspec, by adding enable/disable signals # for parts of the rest of the core self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag' if self.jtag_en: subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1', 'pwm', 'sd0', 'sdr'} self.jtag = JTAG(get_pinspecs(subset=subset)) # add signals to pspec to enable/disable icache and dcache # (or data and intstruction wishbone if icache/dcache not included) # https://bugs.libre-soc.org/show_bug.cgi?id=520 # TODO: do we actually care if these are not domain-synchronised? # honestly probably not. pspec.wb_icache_en = self.jtag.wb_icache_en pspec.wb_dcache_en = self.jtag.wb_dcache_en self.wb_sram_en = self.jtag.wb_sram_en else: self.wb_sram_en = Const(1) # add 4k sram blocks? self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and pspec.sram4x4kblock == True) if self.sram4x4k: self.sram4k = [] for i in range(4): self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i, features={'err'})) # add interrupt controller? self.xics = hasattr(pspec, "xics") and pspec.xics == True if self.xics: self.xics_icp = XICS_ICP() self.xics_ics = XICS_ICS() self.int_level_i = self.xics_ics.int_level_i # add GPIO peripheral? self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True if self.gpio: self.simple_gpio = SimpleGPIO() self.gpio_o = self.simple_gpio.gpio_o # main instruction core25 self.core = core = NonProductionCore(pspec) # instruction decoder. goes into Trap Record pdecode = create_pdecode() self.cur_state = CoreState("cur") # current state (MSR/PC/EINT/SVSTATE) self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state, opkls=IssuerDecode2ToOperand, svp64_en=self.svp64_en) if self.svp64_en: self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix # Test Instruction memory self.imem = ConfigFetchUnit(pspec).fu # one-row cache of instruction read self.iline = Signal(64) # one instruction line self.iprev_adr = Signal(64) # previous address: if different, do read # DMI interface self.dbg = CoreDebug() # instruction go/monitor self.pc_o = Signal(64, reset_less=True) self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me" self.svstate_i = Data(32, "svstate_i") # ditto self.core_bigendian_i = Signal() self.busy_o = Signal(reset_less=True) self.memerr_o = Signal(reset_less=True) # STATE regfile read /write ports for PC, MSR, SVSTATE staterf = self.core.regs.rf['state'] self.state_r_pc = staterf.r_ports['cia'] # PC rd self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr self.state_r_msr = staterf.r_ports['msr'] # MSR rd self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr # DMI interface access intrf = self.core.regs.rf['int'] crrf = self.core.regs.rf['cr'] xerrf = self.core.regs.rf['xer'] self.int_r = intrf.r_ports['dmi'] # INT read self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read self.xer_r = xerrf.r_ports['full_xer'] # XER read # hack method of keeping an eye on whether branch/trap set the PC self.state_nia = self.core.regs.rf['state'].w_ports['nia'] self.state_nia.wen.name = 'state_nia_wen' # pulse to synchronize the simulator at instruction end self.insn_done = Signal() def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode, fetch_pc_ready_o, fetch_pc_valid_i, fetch_insn_valid_o, fetch_insn_ready_i): """fetch FSM this FSM performs fetch of raw instruction data, partial-decodes it 32-bit at a time to detect SVP64 prefixes, and will optionally read a 2nd 32-bit quantity if that occurs. """ comb = m.d.comb sync = m.d.sync pdecode2 = self.pdecode2 cur_state = self.cur_state dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode msr_read = Signal(reset=1) with m.FSM(name='fetch_fsm'): # waiting (zzz) with m.State("IDLE"): comb += fetch_pc_ready_o.eq(1) with m.If(fetch_pc_valid_i): # instruction allowed to go: start by reading the PC # capture the PC and also drop it into Insn Memory # we have joined a pair of combinatorial memory # lookups together. this is Generally Bad. comb += self.imem.a_pc_i.eq(pc) comb += self.imem.a_valid_i.eq(1) comb += self.imem.f_valid_i.eq(1) sync += cur_state.pc.eq(pc) sync += cur_state.svstate.eq(svstate) # and svstate # initiate read of MSR. arrives one clock later comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR) sync += msr_read.eq(0) m.next = "INSN_READ" # move to "wait for bus" phase # dummy pause to find out why simulation is not keeping up with m.State("INSN_READ"): # one cycle later, msr/sv read arrives. valid only once. with m.If(~msr_read): sync += msr_read.eq(1) # yeah don't read it again sync += cur_state.msr.eq(self.state_r_msr.data_o) with m.If(self.imem.f_busy_o): # zzz... # busy: stay in wait-read comb += self.imem.a_valid_i.eq(1) comb += self.imem.f_valid_i.eq(1) with m.Else(): # not busy: instruction fetched insn = get_insn(self.imem.f_instr_o, cur_state.pc) if self.svp64_en: svp64 = self.svp64 # decode the SVP64 prefix, if any comb += svp64.raw_opcode_in.eq(insn) comb += svp64.bigendian.eq(self.core_bigendian_i) # pass the decoded prefix (if any) to PowerDecoder2 sync += pdecode2.sv_rm.eq(svp64.svp64_rm) # remember whether this is a prefixed instruction, so # the FSM can readily loop when VL==0 sync += is_svp64_mode.eq(svp64.is_svp64_mode) # calculate the address of the following instruction insn_size = Mux(svp64.is_svp64_mode, 8, 4) sync += nia.eq(cur_state.pc + insn_size) with m.If(~svp64.is_svp64_mode): # with no prefix, store the instruction # and hand it directly to the next FSM sync += dec_opcode_i.eq(insn) m.next = "INSN_READY" with m.Else(): # fetch the rest of the instruction from memory comb += self.imem.a_pc_i.eq(cur_state.pc + 4) comb += self.imem.a_valid_i.eq(1) comb += self.imem.f_valid_i.eq(1) m.next = "INSN_READ2" else: # not SVP64 - 32-bit only sync += nia.eq(cur_state.pc + 4) sync += dec_opcode_i.eq(insn) m.next = "INSN_READY" with m.State("INSN_READ2"): with m.If(self.imem.f_busy_o): # zzz... # busy: stay in wait-read comb += self.imem.a_valid_i.eq(1) comb += self.imem.f_valid_i.eq(1) with m.Else(): # not busy: instruction fetched insn = get_insn(self.imem.f_instr_o, cur_state.pc+4) sync += dec_opcode_i.eq(insn) m.next = "INSN_READY" with m.State("INSN_READY"): # hand over the instruction, to be decoded comb += fetch_insn_valid_o.eq(1) with m.If(fetch_insn_ready_i): m.next = "IDLE" def issue_fsm(self, m, core, pc_changed, sv_changed, nia, dbg, core_rst, is_svp64_mode, fetch_pc_ready_o, fetch_pc_valid_i, fetch_insn_valid_o, fetch_insn_ready_i, exec_insn_valid_i, exec_insn_ready_o, exec_pc_valid_o, exec_pc_ready_i): """issue FSM decode / issue FSM. this interacts with the "fetch" FSM through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid (outgoing). also interacts with the "execute" FSM through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid (incoming). SVP64 RM prefixes have already been set up by the "fetch" phase, so execute is fairly straightforward. """ comb = m.d.comb sync = m.d.sync pdecode2 = self.pdecode2 cur_state = self.cur_state # temporaries dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode # for updating svstate (things like srcstep etc.) update_svstate = Signal() # set this (below) if updating new_svstate = SVSTATERec("new_svstate") comb += new_svstate.eq(cur_state.svstate) with m.FSM(name="issue_fsm"): # go fetch the instruction at the current PC # at this point, there is no instruction running, that # could inadvertently update the PC. with m.State("INSN_FETCH"): # wait on "core stop" release, before next fetch # need to do this here, in case we are in a VL==0 loop with m.If(~dbg.core_stop_o & ~core_rst): comb += fetch_pc_valid_i.eq(1) with m.If(fetch_pc_ready_o): m.next = "INSN_WAIT" with m.Else(): comb += core.core_stopped_i.eq(1) comb += dbg.core_stopped_i.eq(1) # while stopped, allow updating the PC and SVSTATE with m.If(self.pc_i.ok): comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) comb += self.state_w_pc.data_i.eq(self.pc_i.data) sync += pc_changed.eq(1) with m.If(self.svstate_i.ok): comb += new_svstate.eq(self.svstate_i.data) comb += update_svstate.eq(1) sync += sv_changed.eq(1) # decode the instruction when it arrives with m.State("INSN_WAIT"): comb += fetch_insn_ready_i.eq(1) with m.If(fetch_insn_valid_o): # decode the instruction sync += core.e.eq(pdecode2.e) sync += core.state.eq(cur_state) sync += core.raw_insn_i.eq(dec_opcode_i) sync += core.bigendian_i.eq(self.core_bigendian_i) # set RA_OR_ZERO detection in satellite decoders sync += core.sv_a_nz.eq(pdecode2.sv_a_nz) # loop into INSN_FETCH if it's a SVP64 instruction # and VL == 0. this because VL==0 is a for-loop # from 0 to 0 i.e. always, always a NOP. cur_vl = cur_state.svstate.vl with m.If(is_svp64_mode & (cur_vl == 0)): # update the PC before fetching the next instruction # since we are in a VL==0 loop, no instruction was # executed that we could be overwriting comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) comb += self.state_w_pc.data_i.eq(nia) comb += self.insn_done.eq(1) m.next = "INSN_FETCH" with m.Else(): m.next = "INSN_EXECUTE" # move to "execute" with m.State("INSN_EXECUTE"): comb += exec_insn_valid_i.eq(1) with m.If(exec_insn_ready_o): m.next = "EXECUTE_WAIT" with m.State("EXECUTE_WAIT"): # wait on "core stop" release, at instruction end # need to do this here, in case we are in a VL>1 loop with m.If(~dbg.core_stop_o & ~core_rst): comb += exec_pc_ready_i.eq(1) with m.If(exec_pc_valid_o): # precalculate srcstep+1 next_srcstep = Signal.like(cur_state.svstate.srcstep) next_dststep = Signal.like(cur_state.svstate.dststep) comb += next_srcstep.eq(cur_state.svstate.srcstep+1) comb += next_dststep.eq(cur_state.svstate.dststep+1) # was this the last loop iteration? is_last = Signal() cur_vl = cur_state.svstate.vl comb += is_last.eq(next_srcstep == cur_vl) # if either PC or SVSTATE were changed by the previous # instruction, go directly back to Fetch, without # updating either PC or SVSTATE with m.If(pc_changed | sv_changed): m.next = "INSN_FETCH" # also return to Fetch, when no output was a vector # (regardless of SRCSTEP and VL), or when the last # instruction was really the last one of the VL loop with m.Elif((~pdecode2.loop_continue) | is_last): # before going back to fetch, update the PC state # register with the NIA. # ok here we are not reading the branch unit. # TODO: this just blithely overwrites whatever # pipeline updated the PC comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) comb += self.state_w_pc.data_i.eq(nia) # reset SRCSTEP before returning to Fetch with m.If(pdecode2.loop_continue): comb += new_svstate.srcstep.eq(0) comb += new_svstate.dststep.eq(0) comb += update_svstate.eq(1) m.next = "INSN_FETCH" # returning to Execute? then, first update SRCSTEP with m.Else(): comb += new_svstate.srcstep.eq(next_srcstep) comb += new_svstate.dststep.eq(next_dststep) comb += update_svstate.eq(1) m.next = "DECODE_SV" with m.Else(): comb += core.core_stopped_i.eq(1) comb += dbg.core_stopped_i.eq(1) # while stopped, allow updating the PC and SVSTATE with m.If(self.pc_i.ok): comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) comb += self.state_w_pc.data_i.eq(self.pc_i.data) sync += pc_changed.eq(1) with m.If(self.svstate_i.ok): comb += new_svstate.eq(self.svstate_i.data) comb += update_svstate.eq(1) sync += sv_changed.eq(1) # need to decode the instruction again, after updating SRCSTEP # in the previous state. # mostly a copy of INSN_WAIT, but without the actual wait with m.State("DECODE_SV"): # decode the instruction sync += core.e.eq(pdecode2.e) sync += core.state.eq(cur_state) sync += core.bigendian_i.eq(self.core_bigendian_i) sync += core.sv_a_nz.eq(pdecode2.sv_a_nz) m.next = "INSN_EXECUTE" # move to "execute" # check if svstate needs updating: if so, write it to State Regfile with m.If(update_svstate): comb += self.state_w_sv.wen.eq(1<