Separate nest and core clocks
authorRaptor Engineering Development Team <support@raptorengineering.com>
Mon, 11 Apr 2022 19:33:25 +0000 (14:33 -0500)
committerRaptor Engineering Development Team <support@raptorengineering.com>
Mon, 11 Apr 2022 19:33:25 +0000 (14:33 -0500)
Add async bridges between CPU and Wishbone
downconverters

Switch Raptor Versa 85 board to 100MHz
nest and 50MHz core

Verified to function on Raptor Versa 85
board

simsoc.ys
src/ecp5_crg.py
src/ls2.py

index e5e237af95b5040c202fd23ea58c9e2bc3d8b280..72b0f6be4eb71ab2b9d9f45914cd4afaf3c332ef 100644 (file)
--- a/simsoc.ys
+++ b/simsoc.ys
@@ -18,6 +18,7 @@ read_verilog  ../uart16550/rtl/verilog/uart_tfifo.v
 read_verilog  ../uart16550/rtl/verilog/uart_wb.v
 read_verilog  ../tercel-qspi/tercel/phy.v 
 read_verilog  ../tercel-qspi/tercel/wishbone_spi_master.v
+read_verilog  ../verilog-wishbone/rtl/wb_async_reg.v
 # errors in the ethmac rtl, comment out for now
 #read_verilog  ../ethmac/rtl/verilog/eth_clockgen.v
 #read_verilog  ../ethmac/rtl/verilog/eth_cop.v
index b1bd03ce416778f4b49148023d93a2c22f5f879a..36c8f1dd4f5ecde53a30f8b949862abfdb828117 100644 (file)
@@ -2,6 +2,7 @@
 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
 # Copyright (c) 2018-2020 Florent Kermarrec <florent@enjoy-digital.fr>
 # Copyright (c) 2019 Michael Betz <michibetz@gmail.com>
+# Copyright (c) 2022 Raptor Engineering, LLC <support@raptorengineering.com>
 #
 # Based on code from LambaConcept, from the gram example which is BSD-2-License
 # https://github.com/jeanthom/gram/tree/master/examples
@@ -169,8 +170,9 @@ class PLL(Elaboratable):
 
 
 class ECP5CRG(Elaboratable):
-    def __init__(self, sys_clk_freq=100e6, pod_bits=25):
+    def __init__(self, sys_clk_freq=100e6, core_clk_freq=100e6, pod_bits=25):
         self.sys_clk_freq = sys_clk_freq
+        self.core_clk_freq = core_clk_freq
         self.pod_bits = pod_bits
 
         # DDR clock control signals
@@ -219,17 +221,24 @@ class ECP5CRG(Elaboratable):
         m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~pod_done|~reset)
 
         # Generating sync2x (200Mhz) and init (25Mhz) from extclk
+        # sync is our "nest" frequency, cpu is the core frequency
+        # On ASIC, core >= nest, on FPGA, cpu <= nest...
         cd_sync2x = ClockDomain("sync2x", local=False)
         cd_sync2x_unbuf = ClockDomain("sync2x_unbuf",
                                       local=False, reset_less=True)
         cd_init = ClockDomain("init", local=False)
         cd_sync = ClockDomain("sync", local=False)
+        cd_cpu = ClockDomain("cpu", local=False)
         cd_dramsync = ClockDomain("dramsync", local=False)
 
         # create PLL clocks
         pll.set_clkin_freq(platform.default_clk_frequency)
         pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq)
         pll.create_clkout(ClockSignal("init"), 25e6)
+        if self.sys_clk_freq == self.core_clk_freq:
+            m.d.comb += ClockSignal("cpu").eq(ClockSignal("sync"))
+        else:
+            pll.create_clkout(ClockSignal("cpu"), self.core_clk_freq)
         m.submodules += Instance("ECLKSYNCB",
                 i_ECLKI = ClockSignal("sync2x_unbuf"),
                 i_STOP  = self.ddr_clk_stop,
@@ -238,11 +247,13 @@ class ECP5CRG(Elaboratable):
         m.domains += cd_sync2x
         m.domains += cd_init
         m.domains += cd_sync
+        m.domains += cd_cpu
         m.domains += cd_dramsync
         reset_ok = Signal(reset_less=True)
         m.d.comb += reset_ok.eq(~pll.locked|~pod_done)
         m.d.comb += ResetSignal("init").eq(reset_ok)
         m.d.comb += ResetSignal("sync").eq(reset_ok)
+        m.d.comb += ResetSignal("cpu").eq(reset_ok)
         m.d.comb += ResetSignal("dramsync").eq(reset_ok|self.ddr_clk_reset)
 
         # # Generating sync (100Mhz) from sync2x
index 00c185300d311ce93e72267789e4e8f34938c962..b05cf2e489520fe19a42426609a4b2b8eedc7c27 100644 (file)
@@ -28,6 +28,7 @@ from lambdasoc.periph.sram import SRAMPeripheral
 from lambdasoc.periph.timer import TimerPeripheral
 from lambdasoc.periph import Peripheral
 from lambdasoc.soc.base import SoC
+from soc.bus.wb_async import WBAsyncBridge
 from soc.bus.uart_16550 import UART16550 # opencores 16550 uart
 from soc.bus.tercel import Tercel # SPI XIP master
 from soc.bus.opencores_ethmac import EthMAC # OpenCores 10/100 Ethernet MAC
@@ -264,7 +265,8 @@ class DDR3SoC(SoC, Elaboratable):
                  eth0_cfg_addr, eth0_irqno,
                  hyperram_addr=None,
                  hyperram_pins=None,
-                 clk_freq=50e6,
+                 nest_freq=50e6,
+                 core_freq=50e6,
                  add_cpu=True):
 
         # wishbone routing is as follows:
@@ -276,6 +278,8 @@ class DDR3SoC(SoC, Elaboratable):
         #       |     |
         #       +--+--+
         #          |
+        #      WBAsyncBridge
+        #          |
         #      64to32DownCvt
         #          |
         #       arbiter------------------------------------------+
@@ -302,19 +306,46 @@ class DDR3SoC(SoC, Elaboratable):
         if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s']:
             if fpga in ['isim']:
                 pod_bits = 6
-            self.crg = ECP5CRG(clk_freq, pod_bits)
+            self.crg = ECP5CRG(sys_clk_freq=nest_freq, core_clk_freq=core_freq,
+                                         pod_bits=pod_bits)
         if fpga in ['arty_a7']:
-            self.crg = ArtyA7CRG(clk_freq)
+            self.crg = ArtyA7CRG(core_freq)
 
         # set up CPU, with 64-to-32-bit downconverters
         if add_cpu:
-            self.cpu = ExternalCore(name="ext_core")
+            drs = DomainRenamer("cpu")
+
+            self.cpu = drs(ExternalCore(name="ext_core"))
+            if nest_freq == core_freq:
+                # No Wishbone bridge required
+                self.asbrdbus = self.cpu.dbus
+                self.asbribus = self.cpu.ibus
+            else:
+                # Asynchronous Wishbone bridge required
+                asbrdbus = wishbone.Interface(addr_width=32, data_width=64,
+                                             granularity=8, features={'stall'})
+                asbribus = wishbone.Interface(addr_width=32, data_width=64,
+                                             granularity=8, features={'stall'})
+                self.dbusasyncbr = WBAsyncBridge(master_bus=self.cpu.dbus,
+                                             slave_bus=asbrdbus,
+                                             master_clock_domain="cpu",
+                                             slave_clock_domain=None,
+                                             address_width=32, data_width=64,
+                                             granularity=8, features={'stall'})
+                self.ibusasyncbr = WBAsyncBridge(master_bus=self.cpu.ibus,
+                                             slave_bus=asbribus,
+                                             master_clock_domain="cpu",
+                                             slave_clock_domain=None,
+                                             address_width=32, data_width=64,
+                                             granularity=8, features={'stall'})
+                self.asbrdbus = asbrdbus
+                self.asbribus = asbribus
             cvtdbus = wishbone.Interface(addr_width=30, data_width=32,
                                          granularity=8, features={'stall'})
             cvtibus = wishbone.Interface(addr_width=30, data_width=32,
                                          granularity=8, features={'stall'})
-            self.dbusdowncvt = WB64to32Convert(self.cpu.dbus, cvtdbus)
-            self.ibusdowncvt = WB64to32Convert(self.cpu.ibus, cvtibus)
+            self.dbusdowncvt = WB64to32Convert(self.asbrdbus, cvtdbus)
+            self.ibusdowncvt = WB64to32Convert(self.asbribus, cvtibus)
             self._arbiter.add(cvtibus) # I-Cache Master
             self._arbiter.add(cvtdbus) # D-Cache Master. TODO JTAG master
             self.cvtibus = cvtibus
@@ -340,7 +371,8 @@ class DDR3SoC(SoC, Elaboratable):
         # offset executable ELF payload at 1 megabyte offset (1<<20)
         spi_offset = 1<<20 if (spi_0_pins is not None) else None
         dram_offset = ddr_addr if (ddr_pins is not None) else None
-        self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq,
+        self.syscon = MicrowattSYSCON(sys_clk_freq=nest_freq,
+                                      core_clk_freq=core_freq,
                                       has_uart=(uart_pins is not None),
                                       spi_offset=spi_offset,
                                       dram_addr=dram_offset)
@@ -387,24 +419,24 @@ class DDR3SoC(SoC, Elaboratable):
 
         # DRAM Module
         if ddr_pins is not None or fpga == 'sim':
-            ddrmodule = dram_cls(clk_freq, "1:2") # match DDR3 ASIC P/N
+            ddrmodule = dram_cls(nest_freq, "1:2") # match DDR3 ASIC P/N
 
             #drs = lambda x: x
             drs = DomainRenamer("dramsync")
 
             if fpga == 'sim':
                 self.ddrphy = FakePHY(module=ddrmodule,
-                                      settings=sim_ddr3_settings(clk_freq),
+                                      settings=sim_ddr3_settings(nest_freq),
                                       verbosity=SDRAM_VERBOSE_DBG,
-                                      clk_freq=clk_freq)
+                                      clk_freq=nest_freq)
             else:
-                self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=clk_freq))
+                self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=nest_freq))
             self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr)
 
             dramcore = gramCore(phy=self.ddrphy,
                                 geom_settings=ddrmodule.geom_settings,
                                 timing_settings=ddrmodule.timing_settings,
-                                clk_freq=clk_freq)
+                                clk_freq=nest_freq)
             if fpga == 'sim':
                 self.dramcore = dramcore
             else:
@@ -454,7 +486,7 @@ class DDR3SoC(SoC, Elaboratable):
             self.spi0 = Tercel(data_width=32, spi_region_addr_width=24,
                                adr_offset=spi0_addr,
                                features={'stall'},
-                               clk_freq=clk_freq,
+                               clk_freq=nest_freq,
                                pins=spi_0_pins,
                                lattice_ecp5_usrmclk=spi0_is_lattice_ecp5_clk)
             self._decoder.add(self.spi0.bus, addr=spi0_addr)
@@ -483,7 +515,7 @@ class DDR3SoC(SoC, Elaboratable):
 
         self.memory_map = self._decoder.bus.memory_map
 
-        self.clk_freq = clk_freq
+        self.clk_freq = core_freq
         self.fpga = fpga
 
     def elaborate(self, platform):
@@ -522,6 +554,8 @@ class DDR3SoC(SoC, Elaboratable):
         if hasattr(self, "cpu"):
             m.submodules.intc = self.intc
             m.submodules.extcore = self.cpu
+            m.submodules.dbusasyncbr = self.dbusasyncbr
+            m.submodules.ibusasyncbr = self.ibusasyncbr
             m.submodules.dbuscvt = self.dbusdowncvt
             m.submodules.ibuscvt = self.ibusdowncvt
             # create stall sigs, assume wishbone classic
@@ -529,6 +563,18 @@ class DDR3SoC(SoC, Elaboratable):
             #comb += ibus.stall.eq(ibus.stb & ~ibus.ack)
             #comb += dbus.stall.eq(dbus.stb & ~dbus.ack)
 
+            # add wb async bridge verilog source. assumes a directory structure where
+            # microwatt has been checked out in a common subdirectory with:
+            # git clone https://github.com/alexforencich/verilog-wishbone.git
+            # git checkout d1fa24a0
+            verilog_wishbone = "../../verilog-wishbone/rtl"
+            pth = os.path.split(__file__)[0]
+            pth = os.path.join(pth, verilog_wishbone)
+            fname = os.path.abspath(pth)
+            print (fname)
+            self.dbusasyncbr.add_verilog_source(fname, platform)
+            self.ibusasyncbr.add_verilog_source(fname, platform)
+
         m.submodules.arbiter = self._arbiter
         m.submodules.decoder = self._decoder
         if hasattr(self, "ddrphy"):
@@ -677,21 +723,31 @@ def build_platform(fpga, firmware):
     print ("platform", fpga, firmware, platform)
 
     # set clock frequency
-    clk_freq = 70e6
+    core_freq = 70e6
+    nest_freq = core_freq
     if fpga == 'sim':
-        clk_freq = 100e6
+        core_freq = 100e6
+        nest_freq = core_freq
     if fpga == 'isim':
-        clk_freq = 55e6 # below 50 mhz, stops DRAM being enabled
+        core_freq = 55e6 # below 50 mhz, stops DRAM being enabled
+        nest_freq = core_freq
     if fpga == 'versa_ecp5':
-        clk_freq = 55e6 # crank right down to test hyperram
+        core_freq = 55e6 # crank right down to test hyperram
+        nest_freq = core_freq
     if fpga == 'versa_ecp5_85':
+        core_freq = 50e6
+        # DDR3 system interface is clocked at the nest frequency.
         # 50MHz works.  100MHz works.  55MHz does NOT work.
         # Stick with multiples of 50MHz...
-        clk_freq = 50e6
+        # Note the actual DDR3 clock is 2x the nest, as the name
+        # implies...
+        nest_freq = 100e6
     if fpga == 'arty_a7':
-        clk_freq = 50e6
+        core_freq = 50e6
+        nest_freq = core_freq
     if fpga == 'ulx3s':
-        clk_freq = 40.0e6
+        core_freq = 40.0e6
+        nest_freq = core_freq
 
     # select a firmware address
     fw_addr = None
@@ -708,7 +764,7 @@ def build_platform(fpga, firmware):
 
     # get DDR resource pins, disable if clock frequency is below 50 mhz for now
     ddr_pins = None
-    if (clk_freq >= 50e6 and platform is not None and
+    if (nest_freq >= 50e6 and platform is not None and
         fpga in ['versa_ecp5', 'versa_ecp5_85', 'arty_a7', 'isim']):
         ddr_pins = platform.request("ddr3", 0,
                                     dir={"dq":"-", "dqs":"-"},
@@ -850,7 +906,8 @@ def build_platform(fpga, firmware):
                   ethmac_0_pins=ethmac_0_pins,
                   hyperram_pins=hyperram_pins,
                   firmware=firmware,
-                  clk_freq=clk_freq,
+                  core_freq=core_freq,
+                  nest_freq=nest_freq,
                   add_cpu=True)
 
     if toolchain == 'Trellis':