From 2d7021ba09c3e08e1780bf73c26deeaccf689221 Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Mon, 11 Apr 2022 14:33:25 -0500 Subject: [PATCH] Separate nest and core clocks Add async bridges between CPU and Wishbone downconverters Switch Raptor Versa 85 board to 100MHz nest and 50MHz core Verified to function on Raptor Versa 85 board --- simsoc.ys | 1 + src/ecp5_crg.py | 13 +++++- src/ls2.py | 103 +++++++++++++++++++++++++++++++++++++----------- 3 files changed, 93 insertions(+), 24 deletions(-) diff --git a/simsoc.ys b/simsoc.ys index e5e237a..72b0f6b 100644 --- a/simsoc.ys +++ b/simsoc.ys @@ -18,6 +18,7 @@ read_verilog ../uart16550/rtl/verilog/uart_tfifo.v read_verilog ../uart16550/rtl/verilog/uart_wb.v read_verilog ../tercel-qspi/tercel/phy.v read_verilog ../tercel-qspi/tercel/wishbone_spi_master.v +read_verilog ../verilog-wishbone/rtl/wb_async_reg.v # errors in the ethmac rtl, comment out for now #read_verilog ../ethmac/rtl/verilog/eth_clockgen.v #read_verilog ../ethmac/rtl/verilog/eth_cop.v diff --git a/src/ecp5_crg.py b/src/ecp5_crg.py index b1bd03c..36c8f1d 100644 --- a/src/ecp5_crg.py +++ b/src/ecp5_crg.py @@ -2,6 +2,7 @@ # Copyright (c) 2021 Luke Kenneth Casson Leighton # Copyright (c) 2018-2020 Florent Kermarrec # Copyright (c) 2019 Michael Betz +# Copyright (c) 2022 Raptor Engineering, LLC # # Based on code from LambaConcept, from the gram example which is BSD-2-License # https://github.com/jeanthom/gram/tree/master/examples @@ -169,8 +170,9 @@ class PLL(Elaboratable): class ECP5CRG(Elaboratable): - def __init__(self, sys_clk_freq=100e6, pod_bits=25): + def __init__(self, sys_clk_freq=100e6, core_clk_freq=100e6, pod_bits=25): self.sys_clk_freq = sys_clk_freq + self.core_clk_freq = core_clk_freq self.pod_bits = pod_bits # DDR clock control signals @@ -219,17 +221,24 @@ class ECP5CRG(Elaboratable): m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~pod_done|~reset) # Generating sync2x (200Mhz) and init (25Mhz) from extclk + # sync is our "nest" frequency, cpu is the core frequency + # On ASIC, core >= nest, on FPGA, cpu <= nest... cd_sync2x = ClockDomain("sync2x", local=False) cd_sync2x_unbuf = ClockDomain("sync2x_unbuf", local=False, reset_less=True) cd_init = ClockDomain("init", local=False) cd_sync = ClockDomain("sync", local=False) + cd_cpu = ClockDomain("cpu", local=False) cd_dramsync = ClockDomain("dramsync", local=False) # create PLL clocks pll.set_clkin_freq(platform.default_clk_frequency) pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq) pll.create_clkout(ClockSignal("init"), 25e6) + if self.sys_clk_freq == self.core_clk_freq: + m.d.comb += ClockSignal("cpu").eq(ClockSignal("sync")) + else: + pll.create_clkout(ClockSignal("cpu"), self.core_clk_freq) m.submodules += Instance("ECLKSYNCB", i_ECLKI = ClockSignal("sync2x_unbuf"), i_STOP = self.ddr_clk_stop, @@ -238,11 +247,13 @@ class ECP5CRG(Elaboratable): m.domains += cd_sync2x m.domains += cd_init m.domains += cd_sync + m.domains += cd_cpu m.domains += cd_dramsync reset_ok = Signal(reset_less=True) m.d.comb += reset_ok.eq(~pll.locked|~pod_done) m.d.comb += ResetSignal("init").eq(reset_ok) m.d.comb += ResetSignal("sync").eq(reset_ok) + m.d.comb += ResetSignal("cpu").eq(reset_ok) m.d.comb += ResetSignal("dramsync").eq(reset_ok|self.ddr_clk_reset) # # Generating sync (100Mhz) from sync2x diff --git a/src/ls2.py b/src/ls2.py index 00c1853..b05cf2e 100644 --- a/src/ls2.py +++ b/src/ls2.py @@ -28,6 +28,7 @@ from lambdasoc.periph.sram import SRAMPeripheral from lambdasoc.periph.timer import TimerPeripheral from lambdasoc.periph import Peripheral from lambdasoc.soc.base import SoC +from soc.bus.wb_async import WBAsyncBridge from soc.bus.uart_16550 import UART16550 # opencores 16550 uart from soc.bus.tercel import Tercel # SPI XIP master from soc.bus.opencores_ethmac import EthMAC # OpenCores 10/100 Ethernet MAC @@ -264,7 +265,8 @@ class DDR3SoC(SoC, Elaboratable): eth0_cfg_addr, eth0_irqno, hyperram_addr=None, hyperram_pins=None, - clk_freq=50e6, + nest_freq=50e6, + core_freq=50e6, add_cpu=True): # wishbone routing is as follows: @@ -276,6 +278,8 @@ class DDR3SoC(SoC, Elaboratable): # | | # +--+--+ # | + # WBAsyncBridge + # | # 64to32DownCvt # | # arbiter------------------------------------------+ @@ -302,19 +306,46 @@ class DDR3SoC(SoC, Elaboratable): if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s']: if fpga in ['isim']: pod_bits = 6 - self.crg = ECP5CRG(clk_freq, pod_bits) + self.crg = ECP5CRG(sys_clk_freq=nest_freq, core_clk_freq=core_freq, + pod_bits=pod_bits) if fpga in ['arty_a7']: - self.crg = ArtyA7CRG(clk_freq) + self.crg = ArtyA7CRG(core_freq) # set up CPU, with 64-to-32-bit downconverters if add_cpu: - self.cpu = ExternalCore(name="ext_core") + drs = DomainRenamer("cpu") + + self.cpu = drs(ExternalCore(name="ext_core")) + if nest_freq == core_freq: + # No Wishbone bridge required + self.asbrdbus = self.cpu.dbus + self.asbribus = self.cpu.ibus + else: + # Asynchronous Wishbone bridge required + asbrdbus = wishbone.Interface(addr_width=32, data_width=64, + granularity=8, features={'stall'}) + asbribus = wishbone.Interface(addr_width=32, data_width=64, + granularity=8, features={'stall'}) + self.dbusasyncbr = WBAsyncBridge(master_bus=self.cpu.dbus, + slave_bus=asbrdbus, + master_clock_domain="cpu", + slave_clock_domain=None, + address_width=32, data_width=64, + granularity=8, features={'stall'}) + self.ibusasyncbr = WBAsyncBridge(master_bus=self.cpu.ibus, + slave_bus=asbribus, + master_clock_domain="cpu", + slave_clock_domain=None, + address_width=32, data_width=64, + granularity=8, features={'stall'}) + self.asbrdbus = asbrdbus + self.asbribus = asbribus cvtdbus = wishbone.Interface(addr_width=30, data_width=32, granularity=8, features={'stall'}) cvtibus = wishbone.Interface(addr_width=30, data_width=32, granularity=8, features={'stall'}) - self.dbusdowncvt = WB64to32Convert(self.cpu.dbus, cvtdbus) - self.ibusdowncvt = WB64to32Convert(self.cpu.ibus, cvtibus) + self.dbusdowncvt = WB64to32Convert(self.asbrdbus, cvtdbus) + self.ibusdowncvt = WB64to32Convert(self.asbribus, cvtibus) self._arbiter.add(cvtibus) # I-Cache Master self._arbiter.add(cvtdbus) # D-Cache Master. TODO JTAG master self.cvtibus = cvtibus @@ -340,7 +371,8 @@ class DDR3SoC(SoC, Elaboratable): # offset executable ELF payload at 1 megabyte offset (1<<20) spi_offset = 1<<20 if (spi_0_pins is not None) else None dram_offset = ddr_addr if (ddr_pins is not None) else None - self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq, + self.syscon = MicrowattSYSCON(sys_clk_freq=nest_freq, + core_clk_freq=core_freq, has_uart=(uart_pins is not None), spi_offset=spi_offset, dram_addr=dram_offset) @@ -387,24 +419,24 @@ class DDR3SoC(SoC, Elaboratable): # DRAM Module if ddr_pins is not None or fpga == 'sim': - ddrmodule = dram_cls(clk_freq, "1:2") # match DDR3 ASIC P/N + ddrmodule = dram_cls(nest_freq, "1:2") # match DDR3 ASIC P/N #drs = lambda x: x drs = DomainRenamer("dramsync") if fpga == 'sim': self.ddrphy = FakePHY(module=ddrmodule, - settings=sim_ddr3_settings(clk_freq), + settings=sim_ddr3_settings(nest_freq), verbosity=SDRAM_VERBOSE_DBG, - clk_freq=clk_freq) + clk_freq=nest_freq) else: - self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=clk_freq)) + self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=nest_freq)) self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) dramcore = gramCore(phy=self.ddrphy, geom_settings=ddrmodule.geom_settings, timing_settings=ddrmodule.timing_settings, - clk_freq=clk_freq) + clk_freq=nest_freq) if fpga == 'sim': self.dramcore = dramcore else: @@ -454,7 +486,7 @@ class DDR3SoC(SoC, Elaboratable): self.spi0 = Tercel(data_width=32, spi_region_addr_width=24, adr_offset=spi0_addr, features={'stall'}, - clk_freq=clk_freq, + clk_freq=nest_freq, pins=spi_0_pins, lattice_ecp5_usrmclk=spi0_is_lattice_ecp5_clk) self._decoder.add(self.spi0.bus, addr=spi0_addr) @@ -483,7 +515,7 @@ class DDR3SoC(SoC, Elaboratable): self.memory_map = self._decoder.bus.memory_map - self.clk_freq = clk_freq + self.clk_freq = core_freq self.fpga = fpga def elaborate(self, platform): @@ -522,6 +554,8 @@ class DDR3SoC(SoC, Elaboratable): if hasattr(self, "cpu"): m.submodules.intc = self.intc m.submodules.extcore = self.cpu + m.submodules.dbusasyncbr = self.dbusasyncbr + m.submodules.ibusasyncbr = self.ibusasyncbr m.submodules.dbuscvt = self.dbusdowncvt m.submodules.ibuscvt = self.ibusdowncvt # create stall sigs, assume wishbone classic @@ -529,6 +563,18 @@ class DDR3SoC(SoC, Elaboratable): #comb += ibus.stall.eq(ibus.stb & ~ibus.ack) #comb += dbus.stall.eq(dbus.stb & ~dbus.ack) + # add wb async bridge verilog source. assumes a directory structure where + # microwatt has been checked out in a common subdirectory with: + # git clone https://github.com/alexforencich/verilog-wishbone.git + # git checkout d1fa24a0 + verilog_wishbone = "../../verilog-wishbone/rtl" + pth = os.path.split(__file__)[0] + pth = os.path.join(pth, verilog_wishbone) + fname = os.path.abspath(pth) + print (fname) + self.dbusasyncbr.add_verilog_source(fname, platform) + self.ibusasyncbr.add_verilog_source(fname, platform) + m.submodules.arbiter = self._arbiter m.submodules.decoder = self._decoder if hasattr(self, "ddrphy"): @@ -677,21 +723,31 @@ def build_platform(fpga, firmware): print ("platform", fpga, firmware, platform) # set clock frequency - clk_freq = 70e6 + core_freq = 70e6 + nest_freq = core_freq if fpga == 'sim': - clk_freq = 100e6 + core_freq = 100e6 + nest_freq = core_freq if fpga == 'isim': - clk_freq = 55e6 # below 50 mhz, stops DRAM being enabled + core_freq = 55e6 # below 50 mhz, stops DRAM being enabled + nest_freq = core_freq if fpga == 'versa_ecp5': - clk_freq = 55e6 # crank right down to test hyperram + core_freq = 55e6 # crank right down to test hyperram + nest_freq = core_freq if fpga == 'versa_ecp5_85': + core_freq = 50e6 + # DDR3 system interface is clocked at the nest frequency. # 50MHz works. 100MHz works. 55MHz does NOT work. # Stick with multiples of 50MHz... - clk_freq = 50e6 + # Note the actual DDR3 clock is 2x the nest, as the name + # implies... + nest_freq = 100e6 if fpga == 'arty_a7': - clk_freq = 50e6 + core_freq = 50e6 + nest_freq = core_freq if fpga == 'ulx3s': - clk_freq = 40.0e6 + core_freq = 40.0e6 + nest_freq = core_freq # select a firmware address fw_addr = None @@ -708,7 +764,7 @@ def build_platform(fpga, firmware): # get DDR resource pins, disable if clock frequency is below 50 mhz for now ddr_pins = None - if (clk_freq >= 50e6 and platform is not None and + if (nest_freq >= 50e6 and platform is not None and fpga in ['versa_ecp5', 'versa_ecp5_85', 'arty_a7', 'isim']): ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"}, @@ -850,7 +906,8 @@ def build_platform(fpga, firmware): ethmac_0_pins=ethmac_0_pins, hyperram_pins=hyperram_pins, firmware=firmware, - clk_freq=clk_freq, + core_freq=core_freq, + nest_freq=nest_freq, add_cpu=True) if toolchain == 'Trellis': -- 2.30.2