From b586cc83eea22270d7914e7315565118c18fbf75 Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Wed, 13 Apr 2022 19:55:16 -0500 Subject: [PATCH] Add and enable async Wishbone bridges Fix simulation with DDR3+SPI --- runsimsoc2.sh | 2 +- simsoc.ys | 1 + src/ecp5_crg.py | 37 ++++++---- src/ls2.py | 185 +++++++++++++++++++++++++++++++++++++++--------- 4 files changed, 178 insertions(+), 47 deletions(-) diff --git a/runsimsoc2.sh b/runsimsoc2.sh index 66080cb..c4b4288 100755 --- a/runsimsoc2.sh +++ b/runsimsoc2.sh @@ -20,7 +20,7 @@ iverilog -Wall -g2012 -s simsoctb -o simsoc \ ${LIB_DIR}/PUR.v ${LIB_DIR}/GSR.v \ ${LIB_DIR}/FD1S3AX.v ${LIB_DIR}/SGSR.v ${LIB_DIR}/ODDRX2F.v \ ${LIB_DIR}/ODDRX2DQA.v ${LIB_DIR}/DELAYF.v ${LIB_DIR}/BB.v \ - ${LIB_DIR}/OB.v ${LIB_DIR}/IB.v \ + ${LIB_DIR}/DELAYG.v ${LIB_DIR}/OB.v ${LIB_DIR}/IB.v \ ${LIB_DIR}/DQSBUFM.v ${LIB_DIR}/UDFDL5_UDP_X.v \ ${LIB_DIR}/TSHX2DQSA.v ${LIB_DIR}/TSHX2DQA.v \ ${LIB_DIR}/ODDRX2DQSB.v ${LIB_DIR}/IDDRX2DQA.v \ diff --git a/simsoc.ys b/simsoc.ys index e5e237a..72b0f6b 100644 --- a/simsoc.ys +++ b/simsoc.ys @@ -18,6 +18,7 @@ read_verilog ../uart16550/rtl/verilog/uart_tfifo.v read_verilog ../uart16550/rtl/verilog/uart_wb.v read_verilog ../tercel-qspi/tercel/phy.v read_verilog ../tercel-qspi/tercel/wishbone_spi_master.v +read_verilog ../verilog-wishbone/rtl/wb_async_reg.v # errors in the ethmac rtl, comment out for now #read_verilog ../ethmac/rtl/verilog/eth_clockgen.v #read_verilog ../ethmac/rtl/verilog/eth_cop.v diff --git a/src/ecp5_crg.py b/src/ecp5_crg.py index 5c975d6..b0e50cb 100644 --- a/src/ecp5_crg.py +++ b/src/ecp5_crg.py @@ -2,6 +2,7 @@ # Copyright (c) 2021 Luke Kenneth Casson Leighton # Copyright (c) 2018-2020 Florent Kermarrec # Copyright (c) 2019 Michael Betz +# Copyright (C) 2022 Raptor Engineering, LLC # # Based on code from LambaConcept, from the gram example which is BSD-2-License # https://github.com/jeanthom/gram/tree/master/examples @@ -169,8 +170,9 @@ class PLL(Elaboratable): class ECP5CRG(Elaboratable): - def __init__(self, sys_clk_freq=100e6, pod_bits=25): + def __init__(self, sys_clk_freq=100e6, memory_clk_freq=100e6, pod_bits=25): self.sys_clk_freq = sys_clk_freq + self.memory_clk_freq = memory_clk_freq self.pod_bits = pod_bits def elaborate(self, platform): @@ -214,44 +216,51 @@ class ECP5CRG(Elaboratable): m.d.rawclk += podcnt.eq(podcnt-1) m.d.rawclk += pod_done.eq(podcnt == 0) - # Generating sync2x (200Mhz) and init (25Mhz) from extclk - cd_sync2x = ClockDomain("sync2x", local=False) - cd_sync2x_unbuf = ClockDomain("sync2x_unbuf", + # Generating memory2x (200Mhz) and init (25Mhz) from extclk + cd_memory2x = ClockDomain("memory2x", local=False) + cd_memory2x_unbuf = ClockDomain("memory2x_unbuf", local=False, reset_less=True) + cd_memory = ClockDomain("memory", local=False) cd_init = ClockDomain("init", local=False) cd_sync = ClockDomain("sync", local=False) cd_dramsync = ClockDomain("dramsync", local=False) + cd_dramsync2x = ClockDomain("dramsync2x", local=False) # create PLL clocks pll.set_clkin_freq(platform.default_clk_frequency) - pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq) + pll.create_clkout(ClockSignal("memory2x_unbuf"), 2*self.memory_clk_freq) + pll.create_clkout(ClockSignal("sync"), self.sys_clk_freq) pll.create_clkout(ClockSignal("init"), 25e6) m.submodules += Instance("ECLKSYNCB", - i_ECLKI = ClockSignal("sync2x_unbuf"), + i_ECLKI = ClockSignal("memory2x_unbuf"), i_STOP = 0, - o_ECLKO = ClockSignal("sync2x")) - m.domains += cd_sync2x_unbuf - m.domains += cd_sync2x + o_ECLKO = ClockSignal("memory2x")) + m.domains += cd_memory + m.domains += cd_memory2x_unbuf + m.domains += cd_memory2x m.domains += cd_init m.domains += cd_sync m.domains += cd_dramsync + m.domains += cd_dramsync2x reset_ok = Signal(reset_less=True) m.d.comb += reset_ok.eq(~pll.locked|~pod_done) m.d.comb += ResetSignal("init").eq(reset_ok) m.d.comb += ResetSignal("sync").eq(reset_ok) + m.d.comb += ResetSignal("memory").eq(reset_ok) m.d.comb += ResetSignal("dramsync").eq(reset_ok) - # # Generating sync (100Mhz) from sync2x + # # Generating memory (100Mhz) from memory2x m.submodules += Instance("CLKDIVF", p_DIV="2.0", i_ALIGNWD=0, - i_CLKI=ClockSignal("sync2x"), + i_CLKI=ClockSignal("memory2x"), i_RST=0, - o_CDIVX=ClockSignal("sync")) + o_CDIVX=ClockSignal("memory")) - # temporarily set dram sync clock exactly equal to main sync - m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync")) + # temporarily set dram sync clock exactly equal to main memory + m.d.comb += ClockSignal("dramsync").eq(ClockSignal("memory")) + m.d.comb += ClockSignal("dramsync2x").eq(ClockSignal("memory2x")) return m diff --git a/src/ls2.py b/src/ls2.py index 708673b..0bb54b4 100644 --- a/src/ls2.py +++ b/src/ls2.py @@ -33,6 +33,7 @@ from soc.bus.tercel import Tercel # SPI XIP master from soc.bus.opencores_ethmac import EthMAC # OpenCores 10/100 Ethernet MAC from soc.bus.external_core import ExternalCore # external libresoc/microwatt from soc.bus.wb_downconvert import WishboneDownConvert +from soc.bus.wb_async import WBAsyncBridge from soc.bus.syscon import MicrowattSYSCON # DDR3 @@ -245,6 +246,7 @@ class DDR3SoC(SoC, Elaboratable): hyperram_addr=None, hyperram_pins=None, clk_freq=50e6, + memory_clk_freq=50e6, add_cpu=True): # wishbone routing is as follows: @@ -258,11 +260,13 @@ class DDR3SoC(SoC, Elaboratable): # | # 64to32DownCvt # | - # arbiter------------------------------------------+ - # | | - # +---decoder----+--------+---------+-------+--------+ | - # | | | | | | | | - # uart XICS CSRs DRAM XIP SPI HyperRAM EthMAC + # arbiter--------------------------------------------------------+ + # | | + # +---decoder----+--------+-----------------+-------------+--------+ | + # | | | | | | | | + # | | | WBAsyncBridge WBAsyncBridge | | | + # | | | | | | | | + # uart XICS CSRs DRAM XIP SPI HyperRAM EthMAC # set up wishbone bus arbiter and decoder. arbiter routes, # decoder maps local-relative addressed satellites to global addresses @@ -282,7 +286,7 @@ class DDR3SoC(SoC, Elaboratable): if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s']: if fpga in ['isim']: pod_bits = 6 - self.crg = ECP5CRG(clk_freq, pod_bits) + self.crg = ECP5CRG(clk_freq, memory_clk_freq, pod_bits) if fpga in ['arty_a7']: self.crg = ArtyA7CRG(clk_freq) @@ -320,7 +324,9 @@ class DDR3SoC(SoC, Elaboratable): # offset executable ELF payload at 6 megabyte offset (2<<20) spi_offset = 2<<20 if (spi_0_pins is not None) else None dram_offset = ddr_addr if (ddr_pins is not None) else None - self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq, + self.syscon = MicrowattSYSCON(core_clk_freq=clk_freq, + mem_clk_freq=memory_clk_freq, + sys_clk_freq=clk_freq, has_uart=(uart_pins is not None), spi_offset=spi_offset, dram_addr=dram_offset) @@ -367,42 +373,77 @@ class DDR3SoC(SoC, Elaboratable): # DRAM Module if ddr_pins is not None: # or fpga == 'sim': - ddrmodule = dram_cls(clk_freq, "1:2") # match DDR3 ASIC P/N + ddrmodule = dram_cls(memory_clk_freq, "1:2") # match DDR3 ASIC P/N #drs = lambda x: x drs = DomainRenamer("dramsync") if fpga == 'sim': self.ddrphy = FakePHY(module=ddrmodule, - settings=sim_ddr3_settings(clk_freq), + settings=sim_ddr3_settings(memory_clk_freq), verbosity=SDRAM_VERBOSE_DBG, - clk_freq=clk_freq) + clk_freq=memory_clk_freq) else: - self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=clk_freq)) + self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=memory_clk_freq)) self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) dramcore = gramCore(phy=self.ddrphy, geom_settings=ddrmodule.geom_settings, timing_settings=ddrmodule.timing_settings, - clk_freq=clk_freq) + clk_freq=memory_clk_freq) if fpga == 'sim': self.dramcore = dramcore else: self.dramcore = drs(dramcore) - self._decoder.add(self.dramcore.bus, addr=dramcore_addr) - # map the DRAM onto Wishbone, XXX use stall but set classic below - # XXX WHEN ADDING ASYNCBRIDGE IT IS THE **BRIDGE** THAT MUST - # XXX HAVE THE STALL SIGNAL, AND THE **BRIDGE** THAT MUST HAVE - # XXX stall=stb&~ack APPLIED - drambone = gramWishbone(dramcore, features={'stall'}) + # Set up Wishbone asynchronous bridge + self.dramcore_async_bus = wishbone.Interface( + addr_width=self.dramcore.bus.addr_width, + data_width=self.dramcore.bus.data_width, + granularity=self.dramcore.bus.granularity, + features={'stall'}) + self.dramcore_async_bus.memory_map = self.dramcore.bus.memory_map + + self.dramcore_async_br = WBAsyncBridge( + master_bus=self.dramcore_async_bus, + slave_bus=self.dramcore.bus, + master_clock_domain=None, + slave_clock_domain="dramsync", + address_width=self.dramcore.bus.addr_width, + data_width=self.dramcore.bus.data_width, + granularity=self.dramcore.bus.granularity, + master_features={'stall'}) + + # Add wishbone decoder + self._decoder.add(self.dramcore_async_bus, addr=dramcore_addr) + + drambone = gramWishbone(dramcore) if fpga == 'sim': self.drambone = drambone else: self.drambone = drs(drambone) + + # Set up Wishbone asynchronous bridge + self.drambone_async_bus = wishbone.Interface( + addr_width=self.drambone.bus.addr_width, + data_width=self.drambone.bus.data_width, + granularity=self.drambone.bus.granularity, + features={'stall'}) + self.drambone_async_bus.memory_map = self.drambone.bus.memory_map + + self.drambone_async_br = WBAsyncBridge( + master_bus=self.drambone_async_bus, + slave_bus=self.drambone.bus, + master_clock_domain=None, + slave_clock_domain="dramsync", + address_width=self.drambone.bus.addr_width, + data_width=self.drambone.bus.data_width, + granularity=self.drambone.bus.granularity, + master_features={'stall'}) + # XXX ADD THE ASYNCBRIDGE NOT THE DRAMBONE.BUS, THEN # XXX ADD DRAMBONE.BUS TO ASYNCBRIDGE - self._decoder.add(self.drambone.bus, addr=ddr_addr) + self._decoder.add(self.drambone_async_bus, addr=ddr_addr) # additional SRAM at address if DRAM is not also at 0x0 # (TODO, check Flash, and HyperRAM as well) @@ -431,20 +472,66 @@ class DDR3SoC(SoC, Elaboratable): 'isim']: spi0_is_lattice_ecp5_clk = True + if fpga in ['versa_ecp5', 'versa_ecp5_85']: + # XXX + # Versa boards cannot handle higher SPI clock speeds + # (PCB / trace routing problem?) + # Set Tercel clock to base CPU clock to compensate + # for now... + drs = DomainRenamer("sync") + else: + drs = DomainRenamer("memory") + # Tercel contains two independent Wishbone regions, a # configuration region and the direct API access region, # Set the SPI 0 access region to 16MB, as the FPGA # bitstream Flash device is unlikely to be larger than this. # The main SPI Flash (SPI 1) should be set to at # least 28 bits (256MB) to allow the use of large 4BA devices. - self.spi0 = Tercel(data_width=32, spi_region_addr_width=24, + self.spi0 = drs(Tercel(data_width=32, spi_region_addr_width=24, adr_offset=spi0_addr, - features={'stall'}, - clk_freq=clk_freq, + clk_freq=memory_clk_freq, pins=spi_0_pins, - lattice_ecp5_usrmclk=spi0_is_lattice_ecp5_clk) - self._decoder.add(self.spi0.bus, addr=spi0_addr) - self._decoder.add(self.spi0.cfg_bus, addr=spi0_cfg_addr) + lattice_ecp5_usrmclk=spi0_is_lattice_ecp5_clk)) + + + # Set up Wishbone asynchronous bridges + self.spi0_async_bus = wishbone.Interface( + addr_width=self.spi0.bus.addr_width, + data_width=self.spi0.bus.data_width, + granularity=self.spi0.bus.granularity, + features={'stall'}) + self.spi0_async_cfg_bus = wishbone.Interface( + addr_width=self.spi0.cfg_bus.addr_width, + data_width=self.spi0.cfg_bus.data_width, + granularity=self.spi0.cfg_bus.granularity, + features={'stall'}) + self.spi0_async_bus.memory_map = self.spi0.bus.memory_map + self.spi0_async_cfg_bus.memory_map = self.spi0.cfg_bus.memory_map + + self.spi0_async_br = WBAsyncBridge( + master_bus=self.spi0_async_bus, + slave_bus=self.spi0.bus, + master_clock_domain=None, + slave_clock_domain="memory", + address_width=self.spi0.bus.addr_width, + data_width=self.spi0.bus.data_width, + granularity=self.spi0.bus.granularity, + master_features={'stall'}) + + self.spi0_async_cfg_br = WBAsyncBridge( + master_bus=self.spi0_async_cfg_bus, + slave_bus=self.spi0.cfg_bus, + master_clock_domain=None, + slave_clock_domain="memory", + address_width=self.spi0.cfg_bus.addr_width, + data_width=self.spi0.cfg_bus.data_width, + granularity=self.spi0.cfg_bus.granularity, + master_features={'stall'}) + + # Add wishbone decoders + self._decoder.add(self.spi0_async_bus, addr=spi0_addr) + self._decoder.add(self.spi0_async_cfg_bus, addr=spi0_cfg_addr) # Ethernet MAC if ethmac_0_pins is not None and fpga in ['versa_ecp5', @@ -470,6 +557,7 @@ class DDR3SoC(SoC, Elaboratable): self.memory_map = self._decoder.bus.memory_map self.clk_freq = clk_freq + self.memory_clk_freq = memory_clk_freq self.fpga = fpga def elaborate(self, platform): @@ -523,10 +611,22 @@ class DDR3SoC(SoC, Elaboratable): m.submodules.ddrphy = self.ddrphy m.submodules.dramcore = self.dramcore m.submodules.drambone = drambone = self.drambone - # grrr, same problem with drambone: not WB4-pipe compliant - # XXX TAKE THIS OUT, REPLACE WITH ASYNCBRIDGE HAVING - # XXX asyncbridge.bus.stall.eq(asyncbridge.bus.cyc & ...) - comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack) + + # add async wishbone bridges + m.submodules.dramcore_async_br = self.dramcore_async_br + m.submodules.drambone_async_br = self.drambone_async_br + + # add wb async bridge verilog source. assumes a directory structure where + # microwatt has been checked out in a common subdirectory with: + # git clone https://github.com/alexforencich/verilog-wishbone.git + # git checkout d1fa24a0 + verilog_wishbone = "../../verilog-wishbone/rtl" + pth = os.path.split(__file__)[0] + pth = os.path.join(pth, verilog_wishbone) + fname = os.path.abspath(pth) + print (fname) + self.dramcore_async_br.add_verilog_source(fname, platform) + self.drambone_async_br.add_verilog_source(fname, platform) # add hyperram module if hasattr(self, "hyperram"): @@ -566,9 +666,10 @@ class DDR3SoC(SoC, Elaboratable): if hasattr(self, "spi0"): # add spi submodule m.submodules.spi0 = spi = self.spi0 - # gonna drive me nuts, this. - comb += spi.bus.stall.eq(spi.bus.cyc & ~spi.bus.ack) - comb += spi.cfg_bus.stall.eq(spi.cfg_bus.cyc & ~spi.cfg_bus.ack) + + # add async wishbone bridges + m.submodules.spi0_async_br = self.spi0_async_br + m.submodules.spi0_async_cfg_br = self.spi0_async_cfg_br # add Tercel verilog source. assumes a directory structure where # microwatt has been checked out in a common subdirectory with: @@ -581,6 +682,18 @@ class DDR3SoC(SoC, Elaboratable): print (fname) self.spi0.add_verilog_source(fname, platform) + # add wb async bridge verilog source. assumes a directory structure where + # microwatt has been checked out in a common subdirectory with: + # git clone https://github.com/alexforencich/verilog-wishbone.git + # git checkout d1fa24a0 + verilog_wishbone = "../../verilog-wishbone/rtl" + pth = os.path.split(__file__)[0] + pth = os.path.join(pth, verilog_wishbone) + fname = os.path.abspath(pth) + print (fname) + self.spi0_async_br.add_verilog_source(fname, platform) + self.spi0_async_cfg_br.add_verilog_source(fname, platform) + if hasattr(self, "eth0"): # add ethernet submodule m.submodules.eth0 = ethmac = self.eth0 @@ -664,20 +777,27 @@ def build_platform(fpga, firmware): # set clock frequency clk_freq = 70e6 + memory_clk_freq = clk_freq if fpga == 'sim': clk_freq = 100e6 + memory_clk_freq = clk_freq if fpga == 'isim': clk_freq = 55e6 # below 50 mhz, stops DRAM being enabled + memory_clk_freq = clk_freq if fpga == 'versa_ecp5': clk_freq = 55e6 # crank right down to test hyperram + memory_clk_freq = clk_freq if fpga == 'versa_ecp5_85': # 50MHz works. 100MHz works. 55MHz does NOT work. # Stick with multiples of 50MHz... clk_freq = 50e6 + memory_clk_freq = 100e6 if fpga == 'arty_a7': clk_freq = 50e6 + memory_clk_freq = clk_freq if fpga == 'ulx3s': clk_freq = 40.0e6 + memory_clk_freq = clk_freq # select a firmware address fw_addr = None @@ -837,6 +957,7 @@ def build_platform(fpga, firmware): hyperram_pins=hyperram_pins, firmware=firmware, clk_freq=clk_freq, + memory_clk_freq=memory_clk_freq, add_cpu=True) if toolchain == 'Trellis': -- 2.30.2