Add and enable async Wishbone bridges async
authorRaptor Engineering Development Team <support@raptorengineering.com>
Thu, 14 Apr 2022 00:55:16 +0000 (19:55 -0500)
committerRaptor Engineering Development Team <support@raptorengineering.com>
Thu, 14 Apr 2022 00:55:16 +0000 (19:55 -0500)
Fix simulation with DDR3+SPI

runsimsoc2.sh
simsoc.ys
src/ecp5_crg.py
src/ls2.py

index 66080cb6cd1d4d3e856b0f6bca9168e40d2f4fc8..c4b4288074d8bdb11706db1d60c1924f1ff59525 100755 (executable)
@@ -20,7 +20,7 @@ iverilog -Wall -g2012 -s simsoctb -o simsoc \
     ${LIB_DIR}/PUR.v ${LIB_DIR}/GSR.v \
        ${LIB_DIR}/FD1S3AX.v ${LIB_DIR}/SGSR.v ${LIB_DIR}/ODDRX2F.v \
     ${LIB_DIR}/ODDRX2DQA.v ${LIB_DIR}/DELAYF.v ${LIB_DIR}/BB.v \
-    ${LIB_DIR}/OB.v ${LIB_DIR}/IB.v \
+    ${LIB_DIR}/DELAYG.v ${LIB_DIR}/OB.v ${LIB_DIR}/IB.v \
        ${LIB_DIR}/DQSBUFM.v ${LIB_DIR}/UDFDL5_UDP_X.v \
     ${LIB_DIR}/TSHX2DQSA.v ${LIB_DIR}/TSHX2DQA.v \
     ${LIB_DIR}/ODDRX2DQSB.v ${LIB_DIR}/IDDRX2DQA.v \
index e5e237af95b5040c202fd23ea58c9e2bc3d8b280..72b0f6be4eb71ab2b9d9f45914cd4afaf3c332ef 100644 (file)
--- a/simsoc.ys
+++ b/simsoc.ys
@@ -18,6 +18,7 @@ read_verilog  ../uart16550/rtl/verilog/uart_tfifo.v
 read_verilog  ../uart16550/rtl/verilog/uart_wb.v
 read_verilog  ../tercel-qspi/tercel/phy.v 
 read_verilog  ../tercel-qspi/tercel/wishbone_spi_master.v
+read_verilog  ../verilog-wishbone/rtl/wb_async_reg.v
 # errors in the ethmac rtl, comment out for now
 #read_verilog  ../ethmac/rtl/verilog/eth_clockgen.v
 #read_verilog  ../ethmac/rtl/verilog/eth_cop.v
index 5c975d66c1dacaed906e13e96cb4781ceb64593a..b0e50cb459d9d27eeb14076e7915266869b723aa 100644 (file)
@@ -2,6 +2,7 @@
 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
 # Copyright (c) 2018-2020 Florent Kermarrec <florent@enjoy-digital.fr>
 # Copyright (c) 2019 Michael Betz <michibetz@gmail.com>
+# Copyright (C) 2022 Raptor Engineering, LLC <support@raptorengineering.com>
 #
 # Based on code from LambaConcept, from the gram example which is BSD-2-License
 # https://github.com/jeanthom/gram/tree/master/examples
@@ -169,8 +170,9 @@ class PLL(Elaboratable):
 
 
 class ECP5CRG(Elaboratable):
-    def __init__(self, sys_clk_freq=100e6, pod_bits=25):
+    def __init__(self, sys_clk_freq=100e6, memory_clk_freq=100e6, pod_bits=25):
         self.sys_clk_freq = sys_clk_freq
+        self.memory_clk_freq = memory_clk_freq
         self.pod_bits = pod_bits
 
     def elaborate(self, platform):
@@ -214,44 +216,51 @@ class ECP5CRG(Elaboratable):
             m.d.rawclk += podcnt.eq(podcnt-1)
         m.d.rawclk += pod_done.eq(podcnt == 0)
 
-        # Generating sync2x (200Mhz) and init (25Mhz) from extclk
-        cd_sync2x = ClockDomain("sync2x", local=False)
-        cd_sync2x_unbuf = ClockDomain("sync2x_unbuf",
+        # Generating memory2x (200Mhz) and init (25Mhz) from extclk
+        cd_memory2x = ClockDomain("memory2x", local=False)
+        cd_memory2x_unbuf = ClockDomain("memory2x_unbuf",
                                       local=False, reset_less=True)
+        cd_memory = ClockDomain("memory", local=False)
         cd_init = ClockDomain("init", local=False)
         cd_sync = ClockDomain("sync", local=False)
         cd_dramsync = ClockDomain("dramsync", local=False)
+        cd_dramsync2x = ClockDomain("dramsync2x", local=False)
 
         # create PLL clocks
         pll.set_clkin_freq(platform.default_clk_frequency)
-        pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq)
+        pll.create_clkout(ClockSignal("memory2x_unbuf"), 2*self.memory_clk_freq)
+        pll.create_clkout(ClockSignal("sync"), self.sys_clk_freq)
         pll.create_clkout(ClockSignal("init"), 25e6)
         m.submodules += Instance("ECLKSYNCB",
-                i_ECLKI = ClockSignal("sync2x_unbuf"),
+                i_ECLKI = ClockSignal("memory2x_unbuf"),
                 i_STOP  = 0,
-                o_ECLKO = ClockSignal("sync2x"))
-        m.domains += cd_sync2x_unbuf
-        m.domains += cd_sync2x
+                o_ECLKO = ClockSignal("memory2x"))
+        m.domains += cd_memory
+        m.domains += cd_memory2x_unbuf
+        m.domains += cd_memory2x
         m.domains += cd_init
         m.domains += cd_sync
         m.domains += cd_dramsync
+        m.domains += cd_dramsync2x
         reset_ok = Signal(reset_less=True)
         m.d.comb += reset_ok.eq(~pll.locked|~pod_done)
         m.d.comb += ResetSignal("init").eq(reset_ok)
         m.d.comb += ResetSignal("sync").eq(reset_ok)
+        m.d.comb += ResetSignal("memory").eq(reset_ok)
         m.d.comb += ResetSignal("dramsync").eq(reset_ok)
 
-        # # Generating sync (100Mhz) from sync2x
+        # # Generating memory (100Mhz) from memory2x
 
         m.submodules += Instance("CLKDIVF",
             p_DIV="2.0",
             i_ALIGNWD=0,
-            i_CLKI=ClockSignal("sync2x"),
+            i_CLKI=ClockSignal("memory2x"),
             i_RST=0,
-            o_CDIVX=ClockSignal("sync"))
+            o_CDIVX=ClockSignal("memory"))
 
-        # temporarily set dram sync clock exactly equal to main sync
-        m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync"))
+        # temporarily set dram sync clock exactly equal to main memory
+        m.d.comb += ClockSignal("dramsync").eq(ClockSignal("memory"))
+        m.d.comb += ClockSignal("dramsync2x").eq(ClockSignal("memory2x"))
 
         return m
 
index 708673b65a526e7fb6850c21526b794cff7a4c88..0bb54b47e2553211a8ca69a96749dbd3b54170dc 100644 (file)
@@ -33,6 +33,7 @@ from soc.bus.tercel import Tercel # SPI XIP master
 from soc.bus.opencores_ethmac import EthMAC # OpenCores 10/100 Ethernet MAC
 from soc.bus.external_core import ExternalCore # external libresoc/microwatt
 from soc.bus.wb_downconvert import WishboneDownConvert
+from soc.bus.wb_async import WBAsyncBridge
 from soc.bus.syscon import MicrowattSYSCON
 
 # DDR3
@@ -245,6 +246,7 @@ class DDR3SoC(SoC, Elaboratable):
                  hyperram_addr=None,
                  hyperram_pins=None,
                  clk_freq=50e6,
+                 memory_clk_freq=50e6,
                  add_cpu=True):
 
         # wishbone routing is as follows:
@@ -258,11 +260,13 @@ class DDR3SoC(SoC, Elaboratable):
         #          |
         #      64to32DownCvt
         #          |
-        #       arbiter------------------------------------------+
-        #          |                                             |
-        #   +---decoder----+--------+---------+-------+--------+ |
-        #   |      |       |        |         |       |        | |
-        #  uart  XICS    CSRs     DRAM     XIP SPI HyperRAM   EthMAC
+        #       arbiter--------------------------------------------------------+
+        #          |                                                           |
+        #   +---decoder----+--------+-----------------+-------------+--------+ |
+        #   |      |       |        |                 |             |        | |
+        #   |      |       |  WBAsyncBridge     WBAsyncBridge       |        | |
+        #   |      |       |        |                 |             |        | |
+        #  uart  XICS    CSRs     DRAM            XIP SPI       HyperRAM   EthMAC
 
         # set up wishbone bus arbiter and decoder. arbiter routes,
         # decoder maps local-relative addressed satellites to global addresses
@@ -282,7 +286,7 @@ class DDR3SoC(SoC, Elaboratable):
         if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s']:
             if fpga in ['isim']:
                 pod_bits = 6
-            self.crg = ECP5CRG(clk_freq, pod_bits)
+            self.crg = ECP5CRG(clk_freq, memory_clk_freq, pod_bits)
         if fpga in ['arty_a7']:
             self.crg = ArtyA7CRG(clk_freq)
 
@@ -320,7 +324,9 @@ class DDR3SoC(SoC, Elaboratable):
         # offset executable ELF payload at 6 megabyte offset (2<<20)
         spi_offset = 2<<20 if (spi_0_pins is not None) else None
         dram_offset = ddr_addr if (ddr_pins is not None) else None
-        self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq,
+        self.syscon = MicrowattSYSCON(core_clk_freq=clk_freq,
+                                      mem_clk_freq=memory_clk_freq,
+                                      sys_clk_freq=clk_freq,
                                       has_uart=(uart_pins is not None),
                                       spi_offset=spi_offset,
                                       dram_addr=dram_offset)
@@ -367,42 +373,77 @@ class DDR3SoC(SoC, Elaboratable):
 
         # DRAM Module
         if ddr_pins is not None: # or fpga == 'sim':
-            ddrmodule = dram_cls(clk_freq, "1:2") # match DDR3 ASIC P/N
+            ddrmodule = dram_cls(memory_clk_freq, "1:2") # match DDR3 ASIC P/N
 
             #drs = lambda x: x
             drs = DomainRenamer("dramsync")
 
             if fpga == 'sim':
                 self.ddrphy = FakePHY(module=ddrmodule,
-                                      settings=sim_ddr3_settings(clk_freq),
+                                      settings=sim_ddr3_settings(memory_clk_freq),
                                       verbosity=SDRAM_VERBOSE_DBG,
-                                      clk_freq=clk_freq)
+                                      clk_freq=memory_clk_freq)
             else:
-                self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=clk_freq))
+                self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=memory_clk_freq))
             self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr)
 
             dramcore = gramCore(phy=self.ddrphy,
                                 geom_settings=ddrmodule.geom_settings,
                                 timing_settings=ddrmodule.timing_settings,
-                                clk_freq=clk_freq)
+                                clk_freq=memory_clk_freq)
             if fpga == 'sim':
                 self.dramcore = dramcore
             else:
                 self.dramcore = drs(dramcore)
-            self._decoder.add(self.dramcore.bus, addr=dramcore_addr)
 
-            # map the DRAM onto Wishbone, XXX use stall but set classic below
-            # XXX WHEN ADDING ASYNCBRIDGE IT IS THE **BRIDGE** THAT MUST
-            # XXX HAVE THE STALL SIGNAL, AND THE **BRIDGE** THAT MUST HAVE
-            # XXX stall=stb&~ack APPLIED
-            drambone = gramWishbone(dramcore, features={'stall'})
+            # Set up Wishbone asynchronous bridge
+            self.dramcore_async_bus = wishbone.Interface(
+                                             addr_width=self.dramcore.bus.addr_width,
+                                             data_width=self.dramcore.bus.data_width,
+                                             granularity=self.dramcore.bus.granularity,
+                                             features={'stall'})
+            self.dramcore_async_bus.memory_map = self.dramcore.bus.memory_map
+
+            self.dramcore_async_br = WBAsyncBridge(
+                                         master_bus=self.dramcore_async_bus,
+                                         slave_bus=self.dramcore.bus,
+                                         master_clock_domain=None,
+                                         slave_clock_domain="dramsync",
+                                         address_width=self.dramcore.bus.addr_width,
+                                         data_width=self.dramcore.bus.data_width,
+                                         granularity=self.dramcore.bus.granularity,
+                                         master_features={'stall'})
+
+            # Add wishbone decoder
+            self._decoder.add(self.dramcore_async_bus, addr=dramcore_addr)
+
+            drambone = gramWishbone(dramcore)
             if fpga == 'sim':
                 self.drambone = drambone
             else:
                 self.drambone = drs(drambone)
+
+            # Set up Wishbone asynchronous bridge
+            self.drambone_async_bus = wishbone.Interface(
+                                             addr_width=self.drambone.bus.addr_width,
+                                             data_width=self.drambone.bus.data_width,
+                                             granularity=self.drambone.bus.granularity,
+                                             features={'stall'})
+            self.drambone_async_bus.memory_map = self.drambone.bus.memory_map
+
+            self.drambone_async_br = WBAsyncBridge(
+                                         master_bus=self.drambone_async_bus,
+                                         slave_bus=self.drambone.bus,
+                                         master_clock_domain=None,
+                                         slave_clock_domain="dramsync",
+                                         address_width=self.drambone.bus.addr_width,
+                                         data_width=self.drambone.bus.data_width,
+                                         granularity=self.drambone.bus.granularity,
+                                         master_features={'stall'})
+
             # XXX ADD THE ASYNCBRIDGE NOT THE DRAMBONE.BUS, THEN
             # XXX ADD DRAMBONE.BUS TO ASYNCBRIDGE
-            self._decoder.add(self.drambone.bus, addr=ddr_addr)
+            self._decoder.add(self.drambone_async_bus, addr=ddr_addr)
 
         # additional SRAM at address if DRAM is not also at 0x0
         # (TODO, check Flash, and HyperRAM as well)
@@ -431,20 +472,66 @@ class DDR3SoC(SoC, Elaboratable):
                         'isim']:
                 spi0_is_lattice_ecp5_clk = True
 
+            if fpga in ['versa_ecp5', 'versa_ecp5_85']:
+                # XXX
+                # Versa boards cannot handle higher SPI clock speeds
+                # (PCB / trace routing problem?)
+                # Set Tercel clock to base CPU clock to compensate
+                # for now...
+                drs = DomainRenamer("sync")
+            else:
+                drs = DomainRenamer("memory")
+
             # Tercel contains two independent Wishbone regions, a
             # configuration region and the direct API access region,
             # Set the SPI 0 access region to 16MB, as the FPGA
             # bitstream Flash device is unlikely to be larger than this.
             # The main SPI Flash (SPI 1) should be set to at
             # least 28 bits (256MB) to allow the use of large 4BA devices.
-            self.spi0 = Tercel(data_width=32, spi_region_addr_width=24,
+            self.spi0 = drs(Tercel(data_width=32, spi_region_addr_width=24,
                                adr_offset=spi0_addr,
-                               features={'stall'},
-                               clk_freq=clk_freq,
+                               clk_freq=memory_clk_freq,
                                pins=spi_0_pins,
-                               lattice_ecp5_usrmclk=spi0_is_lattice_ecp5_clk)
-            self._decoder.add(self.spi0.bus, addr=spi0_addr)
-            self._decoder.add(self.spi0.cfg_bus, addr=spi0_cfg_addr)
+                               lattice_ecp5_usrmclk=spi0_is_lattice_ecp5_clk))
+
+
+            # Set up Wishbone asynchronous bridges
+            self.spi0_async_bus = wishbone.Interface(
+                                             addr_width=self.spi0.bus.addr_width,
+                                             data_width=self.spi0.bus.data_width,
+                                             granularity=self.spi0.bus.granularity,
+                                             features={'stall'})
+            self.spi0_async_cfg_bus = wishbone.Interface(
+                                             addr_width=self.spi0.cfg_bus.addr_width,
+                                             data_width=self.spi0.cfg_bus.data_width,
+                                             granularity=self.spi0.cfg_bus.granularity,
+                                             features={'stall'})
+            self.spi0_async_bus.memory_map = self.spi0.bus.memory_map
+            self.spi0_async_cfg_bus.memory_map = self.spi0.cfg_bus.memory_map
+
+            self.spi0_async_br = WBAsyncBridge(
+                                         master_bus=self.spi0_async_bus,
+                                         slave_bus=self.spi0.bus,
+                                         master_clock_domain=None,
+                                         slave_clock_domain="memory",
+                                         address_width=self.spi0.bus.addr_width,
+                                         data_width=self.spi0.bus.data_width,
+                                         granularity=self.spi0.bus.granularity,
+                                         master_features={'stall'})
+
+            self.spi0_async_cfg_br = WBAsyncBridge(
+                                         master_bus=self.spi0_async_cfg_bus,
+                                         slave_bus=self.spi0.cfg_bus,
+                                         master_clock_domain=None,
+                                         slave_clock_domain="memory",
+                                         address_width=self.spi0.cfg_bus.addr_width,
+                                         data_width=self.spi0.cfg_bus.data_width,
+                                         granularity=self.spi0.cfg_bus.granularity,
+                                         master_features={'stall'})
+
+            # Add wishbone decoders
+            self._decoder.add(self.spi0_async_bus, addr=spi0_addr)
+            self._decoder.add(self.spi0_async_cfg_bus, addr=spi0_cfg_addr)
 
         # Ethernet MAC
         if ethmac_0_pins is not None and fpga in ['versa_ecp5',
@@ -470,6 +557,7 @@ class DDR3SoC(SoC, Elaboratable):
         self.memory_map = self._decoder.bus.memory_map
 
         self.clk_freq = clk_freq
+        self.memory_clk_freq = memory_clk_freq
         self.fpga = fpga
 
     def elaborate(self, platform):
@@ -523,10 +611,22 @@ class DDR3SoC(SoC, Elaboratable):
             m.submodules.ddrphy = self.ddrphy
             m.submodules.dramcore = self.dramcore
             m.submodules.drambone = drambone = self.drambone
-            # grrr, same problem with drambone: not WB4-pipe compliant
-            # XXX TAKE THIS OUT, REPLACE WITH ASYNCBRIDGE HAVING
-            # XXX asyncbridge.bus.stall.eq(asyncbridge.bus.cyc & ...)
-            comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack)
+
+            # add async wishbone bridges
+            m.submodules.dramcore_async_br = self.dramcore_async_br
+            m.submodules.drambone_async_br = self.drambone_async_br
+
+            # add wb async bridge verilog source. assumes a directory structure where
+            # microwatt has been checked out in a common subdirectory with:
+            # git clone https://github.com/alexforencich/verilog-wishbone.git
+            # git checkout d1fa24a0
+            verilog_wishbone = "../../verilog-wishbone/rtl"
+            pth = os.path.split(__file__)[0]
+            pth = os.path.join(pth, verilog_wishbone)
+            fname = os.path.abspath(pth)
+            print (fname)
+            self.dramcore_async_br.add_verilog_source(fname, platform)
+            self.drambone_async_br.add_verilog_source(fname, platform)
 
         # add hyperram module
         if hasattr(self, "hyperram"):
@@ -566,9 +666,10 @@ class DDR3SoC(SoC, Elaboratable):
         if hasattr(self, "spi0"):
             # add spi submodule
             m.submodules.spi0 = spi = self.spi0
-            # gonna drive me nuts, this.
-            comb += spi.bus.stall.eq(spi.bus.cyc & ~spi.bus.ack)
-            comb += spi.cfg_bus.stall.eq(spi.cfg_bus.cyc & ~spi.cfg_bus.ack)
+
+            # add async wishbone bridges
+            m.submodules.spi0_async_br = self.spi0_async_br
+            m.submodules.spi0_async_cfg_br = self.spi0_async_cfg_br
 
             # add Tercel verilog source. assumes a directory structure where
             # microwatt has been checked out in a common subdirectory with:
@@ -581,6 +682,18 @@ class DDR3SoC(SoC, Elaboratable):
             print (fname)
             self.spi0.add_verilog_source(fname, platform)
 
+            # add wb async bridge verilog source. assumes a directory structure where
+            # microwatt has been checked out in a common subdirectory with:
+            # git clone https://github.com/alexforencich/verilog-wishbone.git
+            # git checkout d1fa24a0
+            verilog_wishbone = "../../verilog-wishbone/rtl"
+            pth = os.path.split(__file__)[0]
+            pth = os.path.join(pth, verilog_wishbone)
+            fname = os.path.abspath(pth)
+            print (fname)
+            self.spi0_async_br.add_verilog_source(fname, platform)
+            self.spi0_async_cfg_br.add_verilog_source(fname, platform)
+
         if hasattr(self, "eth0"):
             # add ethernet submodule
             m.submodules.eth0 = ethmac = self.eth0
@@ -664,20 +777,27 @@ def build_platform(fpga, firmware):
 
     # set clock frequency
     clk_freq = 70e6
+    memory_clk_freq = clk_freq
     if fpga == 'sim':
         clk_freq = 100e6
+        memory_clk_freq = clk_freq
     if fpga == 'isim':
         clk_freq = 55e6 # below 50 mhz, stops DRAM being enabled
+        memory_clk_freq = clk_freq
     if fpga == 'versa_ecp5':
         clk_freq = 55e6 # crank right down to test hyperram
+        memory_clk_freq = clk_freq
     if fpga == 'versa_ecp5_85':
         # 50MHz works.  100MHz works.  55MHz does NOT work.
         # Stick with multiples of 50MHz...
         clk_freq = 50e6
+        memory_clk_freq = 100e6
     if fpga == 'arty_a7':
         clk_freq = 50e6
+        memory_clk_freq = clk_freq
     if fpga == 'ulx3s':
         clk_freq = 40.0e6
+        memory_clk_freq = clk_freq
 
     # select a firmware address
     fw_addr = None
@@ -837,6 +957,7 @@ def build_platform(fpga, firmware):
                   hyperram_pins=hyperram_pins,
                   firmware=firmware,
                   clk_freq=clk_freq,
+                  memory_clk_freq=memory_clk_freq,
                   add_cpu=True)
 
     if toolchain == 'Trellis':