Merge branch 'master' of git.libre-soc.org:soc

[soc.git] / src / soc / simple / issuer.py
diff --git a/src/soc/simple/issuer.py b/src/soc/simple/issuer.py

index 9feafaf1f9151f66a0d403df81e675ba254fe1eb..d93e3782b599dc4b17c98bbdaee0b3198e016f88 100644 (file)
--- a/src/soc/simple/issuer.py
+++ b/src/soc/simple/issuer.py
@@ -15,8 +15,11 @@ way, and to at provide something that can be further incrementally
  improved.
  """
  
-from nmigen import Elaboratable, Module, Signal
+from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
+                    ClockDomain, DomainRenamer)
  from nmigen.cli import rtlil
+from nmigen.cli import main
+import sys
  
  from soc.decoder.decode2execute1 import Data
  from soc.experiment.testmem import TestMemory # test only for instructions
@@ -24,6 +27,11 @@ from soc.regfile.regfiles import FastRegs
  from soc.simple.core import NonProductionCore
  from soc.config.test.test_loadstore import TestMemPspec
  from soc.config.ifetch import ConfigFetchUnit
+from soc.decoder.power_enums import MicrOp
+from soc.debug.dmi import CoreDebug, DMIInterface
+from soc.config.state import CoreState
+
+from nmutil.util import rising_edge
  
  
  class TestIssuer(Elaboratable):
@@ -41,16 +49,24 @@ class TestIssuer(Elaboratable):
          self.iline = Signal(64) # one instruction line
          self.iprev_adr = Signal(64) # previous address: if different, do read
  
+        # DMI interface
+        self.dbg = CoreDebug()
+
          # instruction go/monitor
-        self.go_insn_i = Signal(reset_less=True)
          self.pc_o = Signal(64, reset_less=True)
-        self.pc_i = Data(64, "pc") # set "ok" to indicate "please change me"
-        self.busy_o = core.busy_o
+        self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
+        self.core_bigendian_i = Signal()
+        self.busy_o = Signal(reset_less=True)
          self.memerr_o = Signal(reset_less=True)
  
-        # FAST regfile read /write ports
-        self.fast_rd1 = self.core.regs.rf['fast'].r_ports['d_rd1']
-        self.fast_wr1 = self.core.regs.rf['fast'].w_ports['d_wr1']
+        # FAST regfile read /write ports for PC and MSR
+        self.fast_r_pc = self.core.regs.rf['fast'].r_ports['cia'] # PC rd
+        self.fast_w_pc = self.core.regs.rf['fast'].w_ports['d_wr1'] # PC wr
+        self.fast_r_msr = self.core.regs.rf['fast'].r_ports['msr'] # MSR rd
+
+        # DMI interface access
+        self.int_r = self.core.regs.rf['int'].r_ports['dmi'] # INT read
+
          # hack method of keeping an eye on whether branch/trap set the PC
          self.fast_nia = self.core.regs.rf['fast'].w_ports['nia']
          self.fast_nia.wen.name = 'fast_nia_wen'
@@ -59,25 +75,69 @@ class TestIssuer(Elaboratable):
          m = Module()
          comb, sync = m.d.comb, m.d.sync
  
-        m.submodules.core = core = self.core
+        m.submodules.core = core = DomainRenamer("coresync")(self.core)
          m.submodules.imem = imem = self.imem
+        m.submodules.dbg = dbg = self.dbg
+
+        # convenience
+        dmi = dbg.dmi
+        d_reg = dbg.dbg_gpr
+
+        # clock delay power-on reset
+        cd_por  = ClockDomain(reset_less=True)
+        cd_sync = ClockDomain()
+        core_sync = ClockDomain("coresync")
+        m.domains += cd_por, cd_sync, core_sync
+
+        delay = Signal(range(4), reset=1)
+        with m.If(delay != 0):
+            m.d.por += delay.eq(delay - 1)
+        comb += cd_por.clk.eq(ClockSignal())
+        comb += core_sync.clk.eq(ClockSignal())
+        # XXX TODO: power-on reset delay (later)
+        #comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
+
+        # busy/halted signals from core
+        comb += self.busy_o.eq(core.busy_o)
+        comb += core.bigendian_i.eq(self.core_bigendian_i)
+
+        # current state (MSR/PC at the moment
+        cur_state = CoreState("cur")
  
          # temporary hack: says "go" immediately for both address gen and ST
          l0 = core.l0
          ldst = core.fus.fus['ldst0']
-        m.d.comb += ldst.ad.go.eq(ldst.ad.rel) # link addr-go direct to rel
-        m.d.comb += ldst.st.go.eq(ldst.st.rel) # link store-go direct to rel
+        st_go_edge = rising_edge(m, ldst.st.rel_o)
+        m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
+        m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
  
          # PC and instruction from I-Memory
          current_insn = Signal(32) # current fetched instruction (note sync)
-        current_pc = Signal(64) # current PC (note it is reset/sync)
          pc_changed = Signal() # note write to PC
-        comb += self.pc_o.eq(current_pc)
+        comb += self.pc_o.eq(cur_state.pc)
          ilatch = Signal(32)
  
          # next instruction (+4 on current)
          nia = Signal(64, reset_less=True)
-        comb += nia.eq(current_pc + 4)
+        comb += nia.eq(cur_state.pc + 4)
+
+        # read the PC
+        pc = Signal(64, reset_less=True)
+        with m.If(self.pc_i.ok):
+            # incoming override (start from pc_i)
+            comb += pc.eq(self.pc_i.data)
+        with m.Else():
+            # otherwise read FastRegs regfile for PC
+            comb += self.fast_r_pc.ren.eq(1<<FastRegs.PC)
+            comb += pc.eq(self.fast_r_pc.data_o)
+
+        # connect up debug signals
+        # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
+        comb += core.core_stopped_i.eq(dbg.core_stop_o)
+        comb += core.core_reset_i.eq(dbg.core_rst_o)
+        comb += dbg.terminate_i.eq(core.core_terminate_o)
+        comb += dbg.state.pc.eq(pc)
+        comb += dbg.state.msr.eq(cur_state.msr)
  
          # temporaries
          core_busy_o = core.busy_o         # core is busy
@@ -86,29 +146,36 @@ class TestIssuer(Elaboratable):
          core_be_i = core.bigendian_i      # bigendian mode
          core_opcode_i = core.raw_opcode_i # raw opcode
  
+        insn_type = core.pdecode2.e.do.insn_type
+        insn_state = core.pdecode2.state
+
+        # don't read msr every cycle
+        sync += self.fast_r_msr.ren.eq(0)
+
          # actually use a nmigen FSM for the first time (w00t)
+        # this FSM is perhaps unusual in that it detects conditions
+        # then "holds" information, combinatorially, for the core
+        # (as opposed to using sync - which would be on a clock's delay)
+        # this includes the actual opcode, valid flags and so on.
          with m.FSM() as fsm:
  
              # waiting (zzz)
              with m.State("IDLE"):
                  sync += pc_changed.eq(0)
-                with m.If(self.go_insn_i):
+                with m.If(~dbg.core_stop_o):
                      # instruction allowed to go: start by reading the PC
-                    pc = Signal(64, reset_less=True)
-                    with m.If(self.pc_i.ok):
-                        # incoming override (start from pc_i)
-                        comb += pc.eq(self.pc_i.data)
-                    with m.Else():
-                        # otherwise read FastRegs regfile for PC
-                        comb += self.fast_rd1.ren.eq(1<<FastRegs.PC)
-                        comb += pc.eq(self.fast_rd1.data_o)
                      # capture the PC and also drop it into Insn Memory
                      # we have joined a pair of combinatorial memory
                      # lookups together.  this is Generally Bad.
                      comb += self.imem.a_pc_i.eq(pc)
                      comb += self.imem.a_valid_i.eq(1)
                      comb += self.imem.f_valid_i.eq(1)
-                    sync += current_pc.eq(pc)
+                    sync += cur_state.pc.eq(pc)
+
+                    # read MSR, latch it, and put it in decode "state"
+                    sync += self.fast_r_msr.ren.eq(1<<FastRegs.MSR)
+                    sync += cur_state.msr.eq(self.fast_r_msr.data_o)
+
                      m.next = "INSN_READ" # move to "wait for bus" phase
  
              # waiting for instruction bus (stays there until not busy)
@@ -119,42 +186,74 @@ class TestIssuer(Elaboratable):
                      comb += self.imem.f_valid_i.eq(1)
                  with m.Else():
                      # not busy: instruction fetched
-                    insn = self.imem.f_instr_o.word_select(current_pc[2], 32)
+                    f_instr_o = self.imem.f_instr_o
+                    if f_instr_o.width == 32:
+                        insn = f_instr_o
+                    else:
+                        insn = f_instr_o.word_select(cur_state.pc[2], 32)
                      comb += current_insn.eq(insn)
-                    comb += core_ivalid_i.eq(1) # say instruction is valid
-                    comb += core_issue_i.eq(1)  # and issued (ivalid redundant)
-                    comb += core_be_i.eq(0)     # little-endian mode
+                    comb += core_ivalid_i.eq(1) # instruction is valid
+                    comb += core_issue_i.eq(1)  # and issued
                      comb += core_opcode_i.eq(current_insn) # actual opcode
-                    sync += ilatch.eq(current_insn)
-                    m.next = "INSN_ACTIVE" # move to "wait for completion" phase
+                    sync += ilatch.eq(current_insn) # latch current insn
+
+                    # also drop PC and MSR into decode "state"
+                    comb += insn_state.eq(cur_state)
+
+                    m.next = "INSN_ACTIVE" # move to "wait completion"
  
              # instruction started: must wait till it finishes
              with m.State("INSN_ACTIVE"):
-                comb += core_ivalid_i.eq(1) # say instruction is valid
+                with m.If(insn_type != MicrOp.OP_NOP):
+                    comb += core_ivalid_i.eq(1) # instruction is valid
                  comb += core_opcode_i.eq(ilatch) # actual opcode
-                #sync += core_issue_i.eq(0) # issue raises for only one cycle
+                comb += insn_state.eq(cur_state)     # and MSR and PC
                  with m.If(self.fast_nia.wen):
                      sync += pc_changed.eq(1)
                  with m.If(~core_busy_o): # instruction done!
-                    #sync += core_ivalid_i.eq(0) # say instruction is invalid
-                    #sync += core_opcode_i.eq(0) # clear out (no good reason)
                      # ok here we are not reading the branch unit.  TODO
-                    # this just blithely overwrites whatever pipeline updated
-                    # the PC
+                    # this just blithely overwrites whatever pipeline
+                    # updated the PC
                      with m.If(~pc_changed):
-                        comb += self.fast_wr1.wen.eq(1<<FastRegs.PC)
-                        comb += self.fast_wr1.data_i.eq(nia)
+                        comb += self.fast_w_pc.wen.eq(1<<FastRegs.PC)
+                        comb += self.fast_w_pc.data_i.eq(nia)
                      m.next = "IDLE" # back to idle
  
+        # this bit doesn't have to be in the FSM: connect up to read
+        # regfiles on demand from DMI
+
+        with m.If(d_reg.req): # request for regfile access being made
+            # TODO: error-check this
+            # XXX should this be combinatorial?  sync better?
+            comb += self.int_r.ren.eq(1<<d_reg.addr)
+            comb += d_reg.data.eq(self.int_r.data_o)
+            comb += d_reg.ack.eq(1)
+
          return m
  
      def __iter__(self):
          yield from self.pc_i.ports()
          yield self.pc_o
-        yield self.go_insn_i
          yield self.memerr_o
          yield from self.core.ports()
          yield from self.imem.ports()
+        yield self.core_bigendian_i
+        yield self.busy_o
+
+    def ports(self):
+        return list(self)
+
+    def external_ports(self):
+        return self.pc_i.ports() + [self.pc_o,
+                                    self.memerr_o,
+                                    self.core_bigendian_i,
+                                    ClockSignal(),
+                                    ResetSignal(),
+                                    self.busy_o,
+                                    ] + \
+                list(self.dbg.dmi.ports()) + \
+                list(self.imem.ibus.fields.values()) + \
+                list(self.core.l0.cmpi.lsmem.lsi.dbus.fields.values())
  
      def ports(self):
          return list(self)
@@ -163,6 +262,8 @@ class TestIssuer(Elaboratable):
  if __name__ == '__main__':
      units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
               'spr': 1,
+             'div': 1,
+             'mul': 1,
               'shiftrot': 1}
      pspec = TestMemPspec(ldst_ifacetype='bare_wb',
                           imem_ifacetype='bare_wb',
@@ -171,7 +272,9 @@ if __name__ == '__main__':
                           reg_wid=64,
                           units=units)
      dut = TestIssuer(pspec)
-    vl = rtlil.convert(dut, ports=dut.ports(), name="test_issuer")
-    with open("test_issuer.il", "w") as f:
-        f.write(vl)
+    vl = main(dut, ports=dut.ports(), name="test_issuer")
  
+    if len(sys.argv) == 1:
+        vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
+        with open("test_issuer.il", "w") as f:
+            f.write(vl)