change over to DMI debug start/stop interface
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 3 Aug 2020 17:02:52 +0000 (18:02 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 3 Aug 2020 17:03:04 +0000 (18:03 +0100)
src/soc/simple/core.py
src/soc/simple/issuer.py
src/soc/simple/test/test_core.py
src/soc/simple/test/test_issuer.py
src/soc/simulator/test_sim.py

index 075ca557380af20de503960376306482b5aa78fe..3b195d240c925ab9cdebc1278a0a6eb43a9fc071 100644 (file)
@@ -19,7 +19,7 @@ and consequently it is safer to wait for the Function Unit to complete
 before allowing a new instruction to proceed.
 """
 
-from nmigen import Elaboratable, Module, Signal
+from nmigen import Elaboratable, Module, Signal, ResetSignal
 from nmigen.cli import rtlil
 
 from nmutil.picker import PriorityPicker
@@ -84,9 +84,9 @@ class NonProductionCore(Elaboratable):
         self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
 
         # start/stop and terminated signalling
-        self.core_start_i = Signal(reset_less=True)
-        self.core_stop_i = Signal(reset_less=True)
-        self.core_terminated_o = Signal(reset=0)  # indicates stopped
+        self.core_stopped_i = Signal(reset_less=True)
+        self.core_reset_i = Signal()
+        self.core_terminate_o = Signal(reset=0)  # indicates stopped
 
     def elaborate(self, platform):
         m = Module()
@@ -98,24 +98,17 @@ class NonProductionCore(Elaboratable):
         regs = self.regs
         fus = self.fus.fus
 
-        # core start/stopped state
-        core_stopped = Signal(reset=0) # begins in running state
-
-        # start/stop signalling
-        with m.If(self.core_start_i):
-            m.d.sync += core_stopped.eq(0)
-        with m.If(self.core_stop_i):
-            m.d.sync += core_stopped.eq(1)
-        m.d.comb += self.core_terminated_o.eq(core_stopped)
-
         # connect up Function Units, then read/write ports
-        fu_bitdict = self.connect_instruction(m, core_stopped)
+        fu_bitdict = self.connect_instruction(m)
         self.connect_rdports(m, fu_bitdict)
         self.connect_wrports(m, fu_bitdict)
 
+        # connect up reset
+        m.d.comb += ResetSignal().eq(self.core_reset_i)
+
         return m
 
-    def connect_instruction(self, m, core_stopped):
+    def connect_instruction(self, m):
         """connect_instruction
 
         uses decoded (from PowerOp) function unit information from CSV files
@@ -138,7 +131,7 @@ class NonProductionCore(Elaboratable):
             fu_bitdict[funame] = fu_enable[i]
         # only run when allowed and when instruction is valid
         can_run = Signal(reset_less=True)
-        comb += can_run.eq(self.ivalid_i & ~core_stopped)
+        comb += can_run.eq(self.ivalid_i & ~self.core_stopped_i)
 
         # enable the required Function Unit based on the opcode decode
         # note: this *only* works correctly for simple core when one and
@@ -159,7 +152,7 @@ class NonProductionCore(Elaboratable):
             with m.Switch(dec2.e.do.insn_type):
                 # check for ATTN: halt if true
                 with m.Case(MicrOp.OP_ATTN):
-                    m.d.sync += core_stopped.eq(1)
+                    m.d.sync += self.core_terminate_o.eq(1)
 
                 with m.Case(MicrOp.OP_NOP):
                     sync += counter.eq(2)
index a273fd1656d2c393116d5c4485ac0c2c7b7591ca..e625b8dd9f2ab0f806846b86a82d6fef74355df1 100644 (file)
@@ -15,7 +15,8 @@ way, and to at provide something that can be further incrementally
 improved.
 """
 
-from nmigen import Elaboratable, Module, Signal
+from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
+                    ClockDomain, DomainRenamer)
 from nmigen.cli import rtlil
 from nmigen.cli import main
 import sys
@@ -27,6 +28,7 @@ from soc.simple.core import NonProductionCore
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.config.ifetch import ConfigFetchUnit
 from soc.decoder.power_enums import MicrOp
+from soc.debug.dmi import CoreDebug, DMIInterface
 
 
 class TestIssuer(Elaboratable):
@@ -44,15 +46,15 @@ class TestIssuer(Elaboratable):
         self.iline = Signal(64) # one instruction line
         self.iprev_adr = Signal(64) # previous address: if different, do read
 
+        # DMI interface
+        self.dbg = CoreDebug()
+        self.dmi = self.dbg.dmi
+
         # instruction go/monitor
-        self.go_insn_i = Signal()
         self.pc_o = Signal(64, reset_less=True)
         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
-        self.core_start_i = Signal()
-        self.core_stop_i = Signal()
         self.core_bigendian_i = Signal()
         self.busy_o = Signal(reset_less=True)
-        self.halted_o = Signal(reset_less=True)
         self.memerr_o = Signal(reset_less=True)
 
         # FAST regfile read /write ports for PC and MSR
@@ -68,14 +70,27 @@ class TestIssuer(Elaboratable):
         m = Module()
         comb, sync = m.d.comb, m.d.sync
 
-        m.submodules.core = core = self.core
+        m.submodules.core = core = DomainRenamer("coresync")(self.core)
         m.submodules.imem = imem = self.imem
+        m.submodules.dbg = dbg = self.dbg
+
+        # clock delay power-on reset
+        cd_por  = ClockDomain(reset_less=True)
+        cd_sync = ClockDomain()
+        core_sync = ClockDomain("coresync")
+        m.domains += cd_por, cd_sync, core_sync
+
+        delay = Signal(range(4), reset=1)
+        with m.If(delay != 0):
+            m.d.por += delay.eq(delay - 1)
+        comb += cd_por.clk.eq(ClockSignal())
+        comb += core_sync.clk.eq(ClockSignal())
+        # XXX TODO: power-on reset delay (later)
+        #comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
+        comb += core.core_reset_i.eq(dbg.core_rst_o)
 
         # busy/halted signals from core
         comb += self.busy_o.eq(core.busy_o)
-        comb += self.halted_o.eq(core.core_terminated_o)
-        comb += core.core_start_i.eq(self.core_start_i)
-        comb += core.core_stop_i.eq(self.core_stop_i)
         comb += core.bigendian_i.eq(self.core_bigendian_i)
 
         # temporary hack: says "go" immediately for both address gen and ST
@@ -99,6 +114,12 @@ class TestIssuer(Elaboratable):
         nia = Signal(64, reset_less=True)
         comb += nia.eq(cur_pc + 4)
 
+        # connect up debug signals
+        comb += core.core_stopped_i.eq(dbg.core_stop_o)
+        # TODO comb += core.reset_i.eq(dbg.core_rst_o)
+        # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
+        comb += dbg.terminate_i.eq(core.core_terminate_o)
+
         # temporaries
         core_busy_o = core.busy_o         # core is busy
         core_ivalid_i = core.ivalid_i     # instruction is valid
@@ -110,121 +131,106 @@ class TestIssuer(Elaboratable):
         insn_msr = core.pdecode2.msr
         insn_cia = core.pdecode2.cia
 
-        # only run if not in halted state
-        with m.If(~core.core_terminated_o):
-
-            # actually use a nmigen FSM for the first time (w00t)
-            # this FSM is perhaps unusual in that it detects conditions
-            # then "holds" information, combinatorially, for the core
-            # (as opposed to using sync - which would be on a clock's delay)
-            # this includes the actual opcode, valid flags and so on.
-            with m.FSM() as fsm:
-
-                # waiting (zzz)
-                with m.State("IDLE"):
-                    sync += pc_changed.eq(0)
-                    with m.If(self.go_insn_i):
-                        # instruction allowed to go: start by reading the PC
-                        pc = Signal(64, reset_less=True)
-                        with m.If(self.pc_i.ok):
-                            # incoming override (start from pc_i)
-                            comb += pc.eq(self.pc_i.data)
-                        with m.Else():
-                            # otherwise read FastRegs regfile for PC
-                            comb += self.fast_r_pc.ren.eq(1<<FastRegs.PC)
-                            comb += pc.eq(self.fast_r_pc.data_o)
-                        # capture the PC and also drop it into Insn Memory
-                        # we have joined a pair of combinatorial memory
-                        # lookups together.  this is Generally Bad.
-                        comb += self.imem.a_pc_i.eq(pc)
-                        comb += self.imem.a_valid_i.eq(1)
-                        comb += self.imem.f_valid_i.eq(1)
-                        sync += cur_pc.eq(pc)
-                        m.next = "INSN_READ" # move to "wait for bus" phase
-
-                # waiting for instruction bus (stays there until not busy)
-                with m.State("INSN_READ"):
-                    with m.If(self.imem.f_busy_o): # zzz...
-                        # busy: stay in wait-read
-                        comb += self.imem.a_valid_i.eq(1)
-                        comb += self.imem.f_valid_i.eq(1)
+        # actually use a nmigen FSM for the first time (w00t)
+        # this FSM is perhaps unusual in that it detects conditions
+        # then "holds" information, combinatorially, for the core
+        # (as opposed to using sync - which would be on a clock's delay)
+        # this includes the actual opcode, valid flags and so on.
+        with m.FSM() as fsm:
+
+            # waiting (zzz)
+            with m.State("IDLE"):
+                sync += pc_changed.eq(0)
+                with m.If(~dbg.core_stop_o):
+                    # instruction allowed to go: start by reading the PC
+                    pc = Signal(64, reset_less=True)
+                    with m.If(self.pc_i.ok):
+                        # incoming override (start from pc_i)
+                        comb += pc.eq(self.pc_i.data)
                     with m.Else():
-                        # not busy: instruction fetched
-                        f_instr_o = self.imem.f_instr_o
-                        if f_instr_o.width == 32:
-                            insn = f_instr_o
-                        else:
-                            insn = f_instr_o.word_select(cur_pc[2], 32)
-                        comb += current_insn.eq(insn)
-                        comb += core_ivalid_i.eq(1) # instruction is valid
-                        comb += core_issue_i.eq(1)  # and issued 
-                        comb += core_opcode_i.eq(current_insn) # actual opcode
-                        sync += ilatch.eq(current_insn) # latch current insn
-
-                        # read MSR, latch it, and put it in decode "state"
-                        comb += self.fast_r_msr.ren.eq(1<<FastRegs.MSR)
-                        comb += msr.eq(self.fast_r_msr.data_o)
-                        comb += insn_msr.eq(msr)
-                        sync += cur_msr.eq(msr) # latch current MSR
-
-                        # also drop PC into decode "state"
-                        comb += insn_cia.eq(cur_pc)
-
-                        m.next = "INSN_ACTIVE" # move to "wait completion" 
-
-                # instruction started: must wait till it finishes
-                with m.State("INSN_ACTIVE"):
-                    with m.If(core.core_terminated_o):
-                        m.next = "IDLE" # back to idle, immediately (OP_ATTN)
-                    with m.Else():
-                        with m.If(insn_type != MicrOp.OP_NOP):
-                            comb += core_ivalid_i.eq(1) # instruction is valid
-                        comb += core_opcode_i.eq(ilatch) # actual opcode
-                        comb += insn_msr.eq(cur_msr)     # and MSR
-                        comb += insn_cia.eq(cur_pc)     # and PC
-                        with m.If(self.fast_nia.wen):
-                            sync += pc_changed.eq(1)
-                        with m.If(~core_busy_o): # instruction done!
-                            # ok here we are not reading the branch unit.  TODO
-                            # this just blithely overwrites whatever pipeline
-                            # updated the PC
-                            with m.If(~pc_changed):
-                                comb += self.fast_w_pc.wen.eq(1<<FastRegs.PC)
-                                comb += self.fast_w_pc.data_i.eq(nia)
-                            m.next = "IDLE" # back to idle
+                        # otherwise read FastRegs regfile for PC
+                        comb += self.fast_r_pc.ren.eq(1<<FastRegs.PC)
+                        comb += pc.eq(self.fast_r_pc.data_o)
+                    # capture the PC and also drop it into Insn Memory
+                    # we have joined a pair of combinatorial memory
+                    # lookups together.  this is Generally Bad.
+                    comb += self.imem.a_pc_i.eq(pc)
+                    comb += self.imem.a_valid_i.eq(1)
+                    comb += self.imem.f_valid_i.eq(1)
+                    sync += cur_pc.eq(pc)
+                    m.next = "INSN_READ" # move to "wait for bus" phase
+
+            # waiting for instruction bus (stays there until not busy)
+            with m.State("INSN_READ"):
+                with m.If(self.imem.f_busy_o): # zzz...
+                    # busy: stay in wait-read
+                    comb += self.imem.a_valid_i.eq(1)
+                    comb += self.imem.f_valid_i.eq(1)
+                with m.Else():
+                    # not busy: instruction fetched
+                    f_instr_o = self.imem.f_instr_o
+                    if f_instr_o.width == 32:
+                        insn = f_instr_o
+                    else:
+                        insn = f_instr_o.word_select(cur_pc[2], 32)
+                    comb += current_insn.eq(insn)
+                    comb += core_ivalid_i.eq(1) # instruction is valid
+                    comb += core_issue_i.eq(1)  # and issued
+                    comb += core_opcode_i.eq(current_insn) # actual opcode
+                    sync += ilatch.eq(current_insn) # latch current insn
+
+                    # read MSR, latch it, and put it in decode "state"
+                    comb += self.fast_r_msr.ren.eq(1<<FastRegs.MSR)
+                    comb += msr.eq(self.fast_r_msr.data_o)
+                    comb += insn_msr.eq(msr)
+                    sync += cur_msr.eq(msr) # latch current MSR
+
+                    # also drop PC into decode "state"
+                    comb += insn_cia.eq(cur_pc)
+
+                    m.next = "INSN_ACTIVE" # move to "wait completion"
+
+            # instruction started: must wait till it finishes
+            with m.State("INSN_ACTIVE"):
+                with m.If(insn_type != MicrOp.OP_NOP):
+                    comb += core_ivalid_i.eq(1) # instruction is valid
+                comb += core_opcode_i.eq(ilatch) # actual opcode
+                comb += insn_msr.eq(cur_msr)     # and MSR
+                comb += insn_cia.eq(cur_pc)     # and PC
+                with m.If(self.fast_nia.wen):
+                    sync += pc_changed.eq(1)
+                with m.If(~core_busy_o): # instruction done!
+                    # ok here we are not reading the branch unit.  TODO
+                    # this just blithely overwrites whatever pipeline
+                    # updated the PC
+                    with m.If(~pc_changed):
+                        comb += self.fast_w_pc.wen.eq(1<<FastRegs.PC)
+                        comb += self.fast_w_pc.data_i.eq(nia)
+                    m.next = "IDLE" # back to idle
 
         return m
 
     def __iter__(self):
         yield from self.pc_i.ports()
         yield self.pc_o
-        yield self.go_insn_i
         yield self.memerr_o
         yield from self.core.ports()
         yield from self.imem.ports()
-        yield self.core_start_i
-        yield self.core_stop_i
         yield self.core_bigendian_i
         yield self.busy_o
-        yield self.halted_o
 
     def ports(self):
         return list(self)
 
     def external_ports(self):
         return self.pc_i.ports() + [self.pc_o,
-                                    self.go_insn_i,
                                     self.memerr_o,
-                                    self.core_start_i,
-                                    self.core_stop_i,
-                                    self.core_bigendian_i,
                                     self.busy_o,
-                                    self.halted_o,
                                     ] + \
+                list(self.dbg.dmi.ports()) + \
                 list(self.imem.ibus.fields.values()) + \
                 list(self.core.l0.cmpi.lsmem.lsi.dbus.fields.values())
 
-
     def ports(self):
         return list(self)
 
index b2c219da2c1825498553c357ea1d75d85c31bdd0..984a3db48c5fa8bab2f8bbed5a71d207400d2600 100644 (file)
@@ -170,11 +170,11 @@ def check_regs(dut, sim, core, test, code):
 def wait_for_busy_hi(cu):
     while True:
         busy_o = yield cu.busy_o
-        terminated_o = yield cu.core_terminated_o
-        if busy_o or terminated_o:
-            print("busy/terminated:", busy_o, terminated_o)
+        terminate_o = yield cu.core_terminate_o
+        if busy_o:
+            print("busy/terminate:", busy_o, terminate_o)
             break
-        print("!busy", busy_o, terminated_o)
+        print("!busy", busy_o, terminate_o)
         yield
 
 
@@ -188,9 +188,9 @@ def set_issue(core, dec2, sim):
 def wait_for_busy_clear(cu):
     while True:
         busy_o = yield cu.busy_o
-        terminated_o = yield cu.core_terminated_o
-        if not busy_o or terminated_o:
-            print("busy/terminated:", busy_o, terminated_o)
+        terminate_o = yield cu.core_terminate_o
+        if not busy_o:
+            print("busy/terminate:", busy_o, terminate_o)
             break
         print("busy",)
         yield
index b8276002067350c30e43f49ac3d56fd2b050ccbc..b1df27dccae250e2b7def5ac59c65ef241831cf6 100644 (file)
@@ -23,6 +23,7 @@ from soc.simple.test.test_core import (setup_regs, check_regs,
                                        wait_for_busy_hi)
 from soc.fu.compunits.test.test_compunit import (setup_test_memory,
                                                  check_sim_memory)
+from soc.debug.dmi import DBGCore, DBGCtrl, DBGStat
 
 # test with ALU data and Logical data
 #from soc.fu.alu.test.test_pipe_caller import ALUTestCase
@@ -86,6 +87,22 @@ def setup_i_memory(imem, startaddr, instructions):
         startaddr = startaddr & mask
 
 
+def set_dmi(dmi, addr, data):
+    yield dmi.req_i.eq(1)
+    yield dmi.addr_i.eq(addr)
+    yield dmi.din.eq(data)
+    yield dmi.we_i.eq(1)
+    while True:
+        ack = yield dmi.ack_o
+        if ack:
+            break
+        yield
+    yield dmi.req_i.eq(0)
+    yield dmi.addr_i.eq(0)
+    yield dmi.din.eq(0)
+    yield dmi.we_i.eq(0)
+
+
 class TestRunner(FHDLTestCase):
     def __init__(self, tst_data):
         super().__init__("run_all")
@@ -94,7 +111,6 @@ class TestRunner(FHDLTestCase):
     def run_all(self):
         m = Module()
         comb = m.d.comb
-        go_insn_i = Signal()
         pc_i = Signal(32)
 
         pspec = TestMemPspec(ldst_ifacetype='test_bare_wb',
@@ -106,11 +122,11 @@ class TestRunner(FHDLTestCase):
         m.submodules.issuer = issuer = TestIssuer(pspec)
         imem = issuer.imem._get_memory()
         core = issuer.core
+        dmi = issuer.dbg.dmi
         pdecode2 = core.pdecode2
         l0 = core.l0
 
         comb += issuer.pc_i.data.eq(pc_i)
-        comb += issuer.go_insn_i.eq(go_insn_i)
 
         # nmigen Simulation
         sim = Simulator(m)
@@ -118,15 +134,25 @@ class TestRunner(FHDLTestCase):
 
         def process():
 
+            # start in stopped
+            yield from set_dmi(dmi, DBGCore.CTRL, 1<<DBGCtrl.STOP)
+            yield
+            yield
+
             for test in self.test_data:
 
-                # get core going
+                # pull a reset
+                yield from set_dmi(dmi, DBGCore.CTRL, 1<<DBGCtrl.RESET)
+
+                # set up bigendian (TODO: don't do this, use MSR)
                 yield issuer.core_bigendian_i.eq(bigendian)
-                yield issuer.core_start_i.eq(1)
-                yield
-                yield issuer.core_start_i.eq(0)
                 yield Settle()
 
+                yield
+                yield
+                yield
+                yield
+
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
@@ -146,6 +172,7 @@ class TestRunner(FHDLTestCase):
                           bigendian=bigendian)
 
                 pc = 0  # start address
+                counter = 0 # test to pause/start
 
                 yield from setup_i_memory(imem, pc, instructions)
                 yield from setup_test_memory(l0, sim)
@@ -163,18 +190,21 @@ class TestRunner(FHDLTestCase):
                     print("instruction: 0x{:X}".format(ins & 0xffffffff))
                     print(index, code)
 
-                    # start the instruction
-                    yield go_insn_i.eq(1)
-                    yield
-                    yield issuer.pc_i.ok.eq(0)  # don't change PC from now on
-                    yield go_insn_i.eq(0)      # and don't issue a new insn
-                    yield Settle()
+                    if counter == 0:
+                        # start the core
+                        yield
+                        yield from set_dmi(dmi, DBGCore.CTRL, 1<<DBGCtrl.START)
+                        yield issuer.pc_i.ok.eq(0)  # no change PC after this
+                        yield
+                        yield
+
+                    counter = counter + 1
 
                     # wait until executed
                     yield from wait_for_busy_hi(core)
                     yield from wait_for_busy_clear(core)
 
-                    terminated = yield issuer.halted_o
+                    terminated = yield issuer.dbg.terminated_o
                     print("terminated", terminated)
 
                     print("sim", code)
@@ -190,7 +220,7 @@ class TestRunner(FHDLTestCase):
                     # Memory check
                     yield from check_sim_memory(self, l0, sim, code)
 
-                    terminated = yield issuer.halted_o
+                    terminated = yield issuer.dbg.terminated_o
                     if terminated:
                         break
 
index f2477a1f9b9df809331c06c1a912bae3c40002ef..0b85ca74019f1d44381c9487b54b4ec35aac46fc 100644 (file)
@@ -49,6 +49,7 @@ class GeneralTestCases(FHDLTestCase):
         super().__init__(name)
         self.test_name = name
 
+    @unittest.skip("disable")
     def test_0_litex_bios_r1(self):
         """litex bios IMM64 macro test
         """
@@ -165,7 +166,7 @@ class GeneralTestCases(FHDLTestCase):
         with Program(lst, bigendian) as program:
             self.run_tst_program(program, [1, 2, 3, 4, 5])
 
-    @unittest.skip("disable")
+    #@unittest.skip("disable")
     def test_add_with_carry(self):
         lst = ["addi 1, 0, 5",
                "neg 1, 1",
@@ -230,6 +231,7 @@ class GeneralTestCases(FHDLTestCase):
         with Program(lst, bigendian) as program:
             self.run_tst_program(program, [1, 2, 3, 4], initial_mem)
 
+    @unittest.skip("disable")
     def test_nop(self):
         lst = ["addi 1, 0, 0x1004",
                "ori 0,0,0", # "preferred" form of nop