not in any way intended for production use. this runs a FSM that:
-* reads the Program Counter from FastRegs
+* reads the Program Counter from StateRegs
* reads an instruction from a fixed-size Test Memory
* issues it to the Simple Core
* waits for it to complete
from nmigen.cli import main
import sys
+from soc.decoder.power_decoder import create_pdecode
+from soc.decoder.power_decoder2 import PowerDecode2
from soc.decoder.decode2execute1 import Data
from soc.experiment.testmem import TestMemory # test only for instructions
-from soc.regfile.regfiles import FastRegs
+from soc.regfile.regfiles import StateRegs
from soc.simple.core import NonProductionCore
from soc.config.test.test_loadstore import TestMemPspec
from soc.config.ifetch import ConfigFetchUnit
from soc.debug.dmi import CoreDebug, DMIInterface
from soc.config.state import CoreState
+from nmutil.util import rising_edge
+
class TestIssuer(Elaboratable):
"""TestIssuer - reads instructions from TestMemory and issues them
# main instruction core
self.core = core = NonProductionCore(pspec)
+ # instruction decoder
+ pdecode = create_pdecode()
+ self.pdecode2 = PowerDecode2(pdecode) # decoder
+
# Test Instruction memory
self.imem = ConfigFetchUnit(pspec).fu
# one-row cache of instruction read
self.memerr_o = Signal(reset_less=True)
# FAST regfile read /write ports for PC and MSR
- self.fast_r_pc = self.core.regs.rf['fast'].r_ports['cia'] # PC rd
- self.fast_w_pc = self.core.regs.rf['fast'].w_ports['d_wr1'] # PC wr
- self.fast_r_msr = self.core.regs.rf['fast'].r_ports['msr'] # MSR rd
+ staterf = self.core.regs.rf['state']
+ self.state_r_pc = staterf.r_ports['cia'] # PC rd
+ self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
+ self.state_r_msr = staterf.r_ports['msr'] # MSR rd
# DMI interface access
- self.int_r = self.core.regs.rf['int'].r_ports['dmi'] # INT read
+ intrf = self.core.regs.rf['int']
+ crrf = self.core.regs.rf['cr']
+ self.int_r = intrf.r_ports['dmi'] # INT read
+ self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
# hack method of keeping an eye on whether branch/trap set the PC
- self.fast_nia = self.core.regs.rf['fast'].w_ports['nia']
- self.fast_nia.wen.name = 'fast_nia_wen'
+ self.state_nia = self.core.regs.rf['state'].w_ports['nia']
+ self.state_nia.wen.name = 'state_nia_wen'
def elaborate(self, platform):
m = Module()
m.submodules.imem = imem = self.imem
m.submodules.dbg = dbg = self.dbg
+ # instruction decoder
+ pdecode = create_pdecode()
+ m.submodules.dec2 = pdecode2 = self.pdecode2
+
# convenience
- dmi = dbg.dmi
- d_reg = dbg.dbg_gpr
+ dmi, d_reg, d_cr = dbg.dmi, dbg.dbg_gpr, dbg.dbg_cr
+ intrf = self.core.regs.rf['int']
# clock delay power-on reset
cd_por = ClockDomain(reset_less=True)
core_sync = ClockDomain("coresync")
m.domains += cd_por, cd_sync, core_sync
- delay = Signal(range(4), reset=1)
+ delay = Signal(range(4), reset=3)
with m.If(delay != 0):
m.d.por += delay.eq(delay - 1)
comb += cd_por.clk.eq(ClockSignal())
comb += core_sync.clk.eq(ClockSignal())
- # XXX TODO: power-on reset delay (later)
- #comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
+ # power-on reset delay
+ comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
# busy/halted signals from core
comb += self.busy_o.eq(core.busy_o)
- comb += core.bigendian_i.eq(self.core_bigendian_i)
+ comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
# current state (MSR/PC at the moment
cur_state = CoreState("cur")
# temporary hack: says "go" immediately for both address gen and ST
l0 = core.l0
ldst = core.fus.fus['ldst0']
+ st_go_edge = rising_edge(m, ldst.st.rel_o)
m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
- m.d.comb += ldst.st.go_i.eq(ldst.st.rel_o) # link store-go direct to rel
+ m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
# PC and instruction from I-Memory
- current_insn = Signal(32) # current fetched instruction (note sync)
pc_changed = Signal() # note write to PC
comb += self.pc_o.eq(cur_state.pc)
ilatch = Signal(32)
- # MSR (temp and latched)
- msr = Signal(64, reset_less=True)
-
# next instruction (+4 on current)
nia = Signal(64, reset_less=True)
comb += nia.eq(cur_state.pc + 4)
+ # read the PC
+ pc = Signal(64, reset_less=True)
+ pc_ok_delay = Signal()
+ sync += pc_ok_delay.eq(~self.pc_i.ok)
+ with m.If(self.pc_i.ok):
+ # incoming override (start from pc_i)
+ comb += pc.eq(self.pc_i.data)
+ with m.Else():
+ # otherwise read StateRegs regfile for PC...
+ comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
+ # ... but on a 1-clock delay
+ with m.If(pc_ok_delay):
+ comb += pc.eq(self.state_r_pc.data_o)
+
+ # don't write pc every cycle
+ comb += self.state_w_pc.wen.eq(0)
+ comb += self.state_w_pc.data_i.eq(0)
+
+ # don't read msr every cycle
+ comb += self.state_r_msr.ren.eq(0)
+
# connect up debug signals
# TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
- comb += core.core_stopped_i.eq(dbg.core_stop_o)
- comb += core.core_reset_i.eq(dbg.core_rst_o)
comb += dbg.terminate_i.eq(core.core_terminate_o)
- comb += dbg.state.pc.eq(nia)
+ comb += dbg.state.pc.eq(pc)
+ #comb += dbg.state.pc.eq(cur_state.pc)
comb += dbg.state.msr.eq(cur_state.msr)
# temporaries
- core_busy_o = core.busy_o # core is busy
- core_ivalid_i = core.ivalid_i # instruction is valid
- core_issue_i = core.issue_i # instruction is issued
- core_be_i = core.bigendian_i # bigendian mode
- core_opcode_i = core.raw_opcode_i # raw opcode
+ core_busy_o = core.busy_o # core is busy
+ core_ivalid_i = core.ivalid_i # instruction is valid
+ core_issue_i = core.issue_i # instruction is issued
+ dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
- insn_type = core.pdecode2.e.do.insn_type
- insn_state = core.pdecode2.state
+ insn_type = core.e.do.insn_type
+ dec_state = pdecode2.state
# actually use a nmigen FSM for the first time (w00t)
# this FSM is perhaps unusual in that it detects conditions
# waiting (zzz)
with m.State("IDLE"):
sync += pc_changed.eq(0)
- with m.If(~dbg.core_stop_o):
+ sync += core.e.eq(0)
+ with m.If(~dbg.core_stop_o & ~core.core_reset_i):
# instruction allowed to go: start by reading the PC
- pc = Signal(64, reset_less=True)
- with m.If(self.pc_i.ok):
- # incoming override (start from pc_i)
- comb += pc.eq(self.pc_i.data)
- with m.Else():
- # otherwise read FastRegs regfile for PC
- comb += self.fast_r_pc.ren.eq(1<<FastRegs.PC)
- comb += pc.eq(self.fast_r_pc.data_o)
# capture the PC and also drop it into Insn Memory
# we have joined a pair of combinatorial memory
# lookups together. this is Generally Bad.
comb += self.imem.a_valid_i.eq(1)
comb += self.imem.f_valid_i.eq(1)
sync += cur_state.pc.eq(pc)
+
+ # initiate read of MSR
+ comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
+
m.next = "INSN_READ" # move to "wait for bus" phase
+ with m.Else():
+ comb += core.core_stopped_i.eq(1)
+ comb += dbg.core_stopped_i.eq(1)
- # waiting for instruction bus (stays there until not busy)
+ # dummy pause to find out why simulation is not keeping up
with m.State("INSN_READ"):
+ # one cycle later, msr read arrives
+ sync += cur_state.msr.eq(self.state_r_msr.data_o)
with m.If(self.imem.f_busy_o): # zzz...
# busy: stay in wait-read
comb += self.imem.a_valid_i.eq(1)
insn = f_instr_o
else:
insn = f_instr_o.word_select(cur_state.pc[2], 32)
- comb += current_insn.eq(insn)
- comb += core_ivalid_i.eq(1) # instruction is valid
- comb += core_issue_i.eq(1) # and issued
- comb += core_opcode_i.eq(current_insn) # actual opcode
- sync += ilatch.eq(current_insn) # latch current insn
+ comb += dec_opcode_i.eq(insn) # actual opcode
+ comb += dec_state.eq(cur_state)
+ sync += core.e.eq(pdecode2.e)
+ sync += ilatch.eq(insn) # latch current insn
+ # also drop PC and MSR into decode "state"
+ m.next = "INSN_START" # move to "start"
- # read MSR, latch it, and put it in decode "state"
- comb += self.fast_r_msr.ren.eq(1<<FastRegs.MSR)
- comb += msr.eq(self.fast_r_msr.data_o)
- comb += insn_state.msr.eq(msr)
- sync += cur_state.msr.eq(msr) # latch current MSR
+ # waiting for instruction bus (stays there until not busy)
+ with m.State("INSN_START"):
+ comb += core_ivalid_i.eq(1) # instruction is valid
+ comb += core_issue_i.eq(1) # and issued
- # also drop PC into decode "state"
- comb += insn_state.pc.eq(cur_state.pc)
- m.next = "INSN_ACTIVE" # move to "wait completion"
+ m.next = "INSN_ACTIVE" # move to "wait completion"
# instruction started: must wait till it finishes
with m.State("INSN_ACTIVE"):
with m.If(insn_type != MicrOp.OP_NOP):
comb += core_ivalid_i.eq(1) # instruction is valid
- comb += core_opcode_i.eq(ilatch) # actual opcode
- comb += insn_state.eq(cur_state) # and MSR and PC
- with m.If(self.fast_nia.wen):
+ with m.If(self.state_nia.wen):
sync += pc_changed.eq(1)
with m.If(~core_busy_o): # instruction done!
# ok here we are not reading the branch unit. TODO
# this just blithely overwrites whatever pipeline
# updated the PC
with m.If(~pc_changed):
- comb += self.fast_w_pc.wen.eq(1<<FastRegs.PC)
- comb += self.fast_w_pc.data_i.eq(nia)
+ comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
+ comb += self.state_w_pc.data_i.eq(nia)
+ sync += core.e.eq(0)
m.next = "IDLE" # back to idle
# this bit doesn't have to be in the FSM: connect up to read
# regfiles on demand from DMI
-
with m.If(d_reg.req): # request for regfile access being made
# TODO: error-check this
# XXX should this be combinatorial? sync better?
- comb += self.int_r.ren.eq(1<<d_reg.addr)
+ if intrf.unary:
+ comb += self.int_r.ren.eq(1<<d_reg.addr)
+ else:
+ comb += self.int_r.addr.eq(d_reg.addr)
+ comb += self.int_r.ren.eq(1)
+ d_reg_delay = Signal()
+ sync += d_reg_delay.eq(d_reg.req)
+ with m.If(d_reg_delay):
+ # data arrives one clock later
comb += d_reg.data.eq(self.int_r.data_o)
comb += d_reg.ack.eq(1)
+ # sigh same thing for CR debug
+ with m.If(d_cr.req): # request for regfile access being made
+ comb += self.cr_r.ren.eq(0b11111111) # enable all
+ d_cr_delay = Signal()
+ sync += d_cr_delay.eq(d_cr.req)
+ with m.If(d_cr_delay):
+ # data arrives one clock later
+ comb += d_cr.data.eq(self.cr_r.data_o)
+ comb += d_cr.ack.eq(1)
+
return m
def __iter__(self):
] + \
list(self.dbg.dmi.ports()) + \
list(self.imem.ibus.fields.values()) + \
- list(self.core.l0.cmpi.lsmem.lsi.dbus.fields.values())
+ list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
def ports(self):
return list(self)
if __name__ == '__main__':
units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
'spr': 1,
+ 'div': 1,
'mul': 1,
- 'shiftrot': 1}
+ 'shiftrot': 1
+ }
pspec = TestMemPspec(ldst_ifacetype='bare_wb',
imem_ifacetype='bare_wb',
addr_wid=48,