reduce versa_ecp5 clock freq to 50 mhz, reduce bit-width of XICS addressing
[ls2.git] / src / ls2.py
index e6cc8bf0b9fbccb64600c2bd2092d57718e64a19..50d69e1e5cf337642067fbb82b9a0a22c3c0ec75 100644 (file)
@@ -1,5 +1,6 @@
 # Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+# Copyright (C) 2022 Raptor Engineering, LLC <support@raptorengineering.com>
 #
 # Based on code from LambaConcept, from the gram example which is BSD-2-License
 # https://github.com/jeanthom/gram/tree/master/examples
@@ -8,40 +9,58 @@
 # under EU Grants 871528 and 957073, under the LGPLv3+ License
 
 from nmigen import (Module, Elaboratable, DomainRenamer, Record,
-                    Signal, Cat, Const)
+                    Signal, Cat, Const, ClockSignal, ResetSignal)
+from nmigen.build.dsl import Attrs
 from nmigen.cli import verilog
 from nmigen.lib.cdc import ResetSynchronizer
 from nmigen_soc import wishbone, memory
 from nmigen_soc.memory import MemoryMap
+from nmigen.utils import log2_int
 
 from nmigen_stdio.serial import AsyncSerial
 
+# HyperRAM
+from nmigen_boards.resources.memory import HyperRAMResource
+from lambdasoc.periph.hyperram import HyperRAM, HyperRAMPads, HyperRAMPHY
+
+from lambdasoc.periph.event import IRQLine
 from lambdasoc.periph.intc import GenericInterruptController
 from lambdasoc.periph.sram import SRAMPeripheral
 from lambdasoc.periph.timer import TimerPeripheral
 from lambdasoc.periph import Peripheral
 from lambdasoc.soc.base import SoC
 from soc.bus.uart_16550 import UART16550 # opencores 16550 uart
+from soc.bus.tercel import Tercel # SPI XIP master
+from soc.bus.opencores_ethmac import EthMAC # OpenCores 10/100 Ethernet MAC
 from soc.bus.external_core import ExternalCore # external libresoc/microwatt
 from soc.bus.wb_downconvert import WishboneDownConvert
 from soc.bus.syscon import MicrowattSYSCON
+from soc.interrupts.xics import XICS_ICP, XICS_ICS
 
+# DDR3
 from gram.common import (PhySettings, get_cl_cw, get_sys_latency,
                         get_sys_phases,)
-from nmigen.utils import log2_int
 from gram.core import gramCore
 from gram.phy.ecp5ddrphy import ECP5DDRPHY
 from gram.phy.fakephy import FakePHY, SDRAM_VERBOSE_STD, SDRAM_VERBOSE_DBG
 from gram.modules import MT41K256M16, MT41K64M16
 from gram.frontend.wishbone import gramWishbone
 
+# SPI / Ethernet MAC
+from nmigen.build import Resource
+from nmigen.build import Subsignal
+from nmigen.build import Pins
+
+# Board (and simulation) platforms
 from nmigen_boards.versa_ecp5 import VersaECP5Platform
+from nmigen_boards.versa_ecp5 import VersaECP5Platform85 # custom board
 from nmigen_boards.ulx3s import ULX3S_85F_Platform
 from nmigen_boards.arty_a7 import ArtyA7_100Platform
 from nmigen_boards.test.blinky import Blinky
-
-from crg import ECPIX5CRG
 from icarusversa import IcarusVersaPlatform
+# Clock-Reset Generator (works for all ECP5 platforms)
+from ecp5_crg import ECP5CRG
+from arty_crg import ArtyA7CRG
 
 import sys
 import os
@@ -96,17 +115,14 @@ class WB64to32Convert(Elaboratable):
         master, slave = self.master, self.slave
 
         has_top = Signal()
+        has_top_r = Signal()
         has_bot = Signal()
 
-        # Do we have a top word and/or a bottom word ?
-        with m.If(master.cyc & master.stb):
-            comb += has_top.eq(master.sel[4:].bool())
-            comb += has_bot.eq(master.sel[:4].bool())
-
         with m.FSM() as fsm:
             with m.State("IDLE"):
-                # Clear ACK in case it was set
+                # Clear ACK (and has_top_r) in case it was set
                 sync += master.ack.eq(0)
+                sync += has_top_r.eq(0)
 
                 # Do we have a cycle ?
                 with m.If(master.cyc & master.stb):
@@ -118,6 +134,12 @@ class WB64to32Convert(Elaboratable):
                     sync += slave.cyc.eq(1)
                     sync += slave.stb.eq(1)
 
+                    # Do we have a top word and/or a bottom word ?
+                    comb += has_top.eq(master.sel[4:].bool())
+                    comb += has_bot.eq(master.sel[:4].bool())
+                    # record the has_top flag for the next FSM state
+                    sync += has_top_r.eq(has_top)
+
                     # Copy write enable to IO out, copy address as well,
                     # LSB is set later based on HI/LO
                     sync += slave.we.eq(master.we)
@@ -145,13 +167,12 @@ class WB64to32Convert(Elaboratable):
                         # Wait for ack on TOP half
                         m.next = "WAIT_ACK_TOP"
 
+
             with m.State("WAIT_ACK_BOT"):
                 # If we aren't stalled by the device, clear stb
                 if hasattr(slave, "stall"):
                     with m.If(~slave.stall):
                         sync += slave.stb.eq(0)
-                else:
-                    sync += slave.stb.eq(0)
 
                 # Handle ack
                 with m.If(slave.ack):
@@ -160,7 +181,7 @@ class WB64to32Convert(Elaboratable):
                         sync += master.dat_r[:32].eq(slave.dat_r)
 
                     # Do we have a "top" part as well ?
-                    with m.If(has_top):
+                    with m.If(has_top_r):
                         # Latch data & sel
                         with m.If(master.we):
                             sync += slave.dat_w.eq(master.dat_w[32:])
@@ -176,6 +197,7 @@ class WB64to32Convert(Elaboratable):
                     with m.Else():
                         # We are done, ack up, clear cyc downstram
                         sync += slave.cyc.eq(0)
+                        sync += slave.stb.eq(0)
 
                         # And ack & unstall upstream
                         sync += master.ack.eq(1)
@@ -190,8 +212,6 @@ class WB64to32Convert(Elaboratable):
                 if hasattr(slave, "stall"):
                     with m.If(~slave.stall):
                         sync += slave.stb.eq(0)
-                else:
-                    sync += slave.stb.eq(0)
 
                 # Handle ack
                 with m.If(slave.ack):
@@ -201,10 +221,11 @@ class WB64to32Convert(Elaboratable):
 
                     # We are done, ack up, clear cyc downstram
                     sync += slave.cyc.eq(0)
+                    sync += slave.stb.eq(0)
 
                     # And ack & unstall upstream
                     sync += master.ack.eq(1)
-                    if hasattr(master , "stall"):
+                    if hasattr(master, "stall"):
                         sync += master.stall.eq(0)
 
                     # Wait for next one
@@ -216,47 +237,98 @@ class WB64to32Convert(Elaboratable):
 class DDR3SoC(SoC, Elaboratable):
     def __init__(self, *,
                  fpga,
-                 dram_cls,
-                 uart_pins, ddr_pins,
-                 ddrphy_addr, dramcore_addr,
-                 ddr_addr, fw_addr=0x0000_0000,
-                 firmware=None,
+                 dram_cls=None,
+                 uart_pins=None, spi_0_pins=None, ethmac_0_pins=None,
+                 ddr_pins=None, ddrphy_addr=None,
+                 dramcore_addr=None, ddr_addr=None,
+                 fw_addr=0x0000_0000, firmware=None,
+                 uart_addr=None, uart_irqno=0,
+                 spi0_addr=None, spi0_cfg_addr=None,
+                 eth0_cfg_addr=None, eth0_irqno=None,
+                 hyperram_addr=None,
+                 hyperram_pins=None,
+                 xics_icp_addr=None, xics_ics_addr=None,
                  clk_freq=50e6,
                  add_cpu=True):
 
+        # wishbone routing is as follows:
+        #
+        #         SoC
+        #       +--+--+
+        #       |     |
+        #      ibus  dbus
+        #       |     |
+        #       +--+--+
+        #          |
+        #      64to32DownCvt
+        #          |
+        #       arbiter------------------------------------------+
+        #          |                                             |
+        #   +---decoder----+--------+---------+-------+--------+ |
+        #   |      |       |        |         |       |        | |
+        #  uart  XICS    CSRs     DRAM     XIP SPI HyperRAM   EthMAC
+
         # set up wishbone bus arbiter and decoder. arbiter routes,
         # decoder maps local-relative addressed satellites to global addresses
         self._arbiter = wishbone.Arbiter(addr_width=30, data_width=32,
                                          granularity=8,
-                                         features={"cti", "bte"})
+                                         features={"cti", "bte", "stall"})
         self._decoder = wishbone.Decoder(addr_width=30, data_width=32,
                                          granularity=8,
-                                         features={"cti", "bte"})
+                                         features={"cti", "bte", "stall"})
 
         # default firmware name
         if firmware is None:
             firmware = "firmware/main.bin"
 
         # set up clock request generator
-        self.crg = ECPIX5CRG(clk_freq)
+        pod_bits = 25
+        if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s']:
+            if fpga in ['isim']:
+                pod_bits = 6
+            self.crg = ECP5CRG(clk_freq, dram_clk_freq=None, pod_bits=pod_bits)
+        if fpga in ['arty_a7']:
+            self.crg = ArtyA7CRG(clk_freq)
 
         # set up CPU, with 64-to-32-bit downconverters
         if add_cpu:
             self.cpu = ExternalCore(name="ext_core")
             cvtdbus = wishbone.Interface(addr_width=30, data_width=32,
-                                         granularity=8)
+                                         granularity=8, features={'stall'})
             cvtibus = wishbone.Interface(addr_width=30, data_width=32,
-                                         granularity=8)
+                                         granularity=8, features={'stall'})
             self.dbusdowncvt = WB64to32Convert(self.cpu.dbus, cvtdbus)
             self.ibusdowncvt = WB64to32Convert(self.cpu.ibus, cvtibus)
             self._arbiter.add(cvtibus) # I-Cache Master
             self._arbiter.add(cvtdbus) # D-Cache Master. TODO JTAG master
+            self.cvtibus = cvtibus
+            self.cvtdbus = cvtdbus
 
-            # CPU interrupt controller
+            # CPU interrupt controller, needs stall to be added, also
+            # compat with wishbone.Interface
             self.intc = GenericInterruptController(width=len(self.cpu.irq))
-
-        # SRAM (but actually a ROM, for firmware), at address 0x0
+            self.xics_icp = icp = XICS_ICP()
+            self.xics_ics = ics = XICS_ICS()
+            self.int_level_i = self.xics_ics.int_level_i
+
+            self.pbus = pbus = wishbone.Interface(name="xics_icp_bus",
+                                         addr_width=6, data_width=32,
+                                         granularity=8, features={'stall'})
+            self.sbus = sbus = wishbone.Interface(name="xics_ics_bus",
+                                         addr_width=10, data_width=32,
+                                         granularity=8, features={'stall'})
+            pmap = MemoryMap(addr_width=8, data_width=8, name="icp_map")
+            pbus.memory_map = pmap
+            self._decoder.add(pbus, addr=xics_icp_addr) # ICP addr
+
+            smap = MemoryMap(addr_width=12, data_width=8, name="ics_map")
+            sbus.memory_map = smap
+            self._decoder.add(sbus, addr=xics_ics_addr) # ICP addr
+
+
+        # SRAM (but actually a ROM, for firmware)
         if fw_addr is not None:
+            print ("fw at address %x" % fw_addr)
             sram_width = 32
             self.bootmem = SRAMPeripheral(size=0x8000, data_width=sram_width,
                                       writable=True)
@@ -268,8 +340,13 @@ class DDR3SoC(SoC, Elaboratable):
             self._decoder.add(self.bootmem.bus, addr=fw_addr) # ROM at fw_addr
 
         # System Configuration info
+        # offset executable ELF payload at 6 megabyte offset (2<<20)
+        spi_offset = 2<<20 if (spi_0_pins is not None) else None
+        dram_offset = ddr_addr if (ddr_pins is not None) else None
         self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq,
-                                      has_uart=(uart_pins is not None))
+                                      has_uart=(uart_pins is not None),
+                                      spi_offset=spi_offset,
+                                      dram_addr=dram_offset)
         self._decoder.add(self.syscon.bus, addr=0xc0000000) # at 0xc000_0000
 
         if False:
@@ -280,14 +357,21 @@ class DDR3SoC(SoC, Elaboratable):
         # UART at 0xC000_2000, convert 32-bit bus down to 8-bit in an odd way
         if uart_pins is not None:
             # sigh actual UART in microwatt is 8-bit
-            self.uart = UART16550(data_width=8, pins=uart_pins)
+            self.uart_irq = IRQLine()
+            self.uart = UART16550(data_width=8, pins=uart_pins,
+                                  features={'stall'},
+                                  irq=self.uart_irq)
             # but (see soc.vhdl) 8-bit regs are addressed at 32-bit locations
+            # strictly speaking this is a nmigen-soc "sparse" arrangement
+            # which should be handled by MemoryMap, but needs investigation
             cvtuartbus = wishbone.Interface(addr_width=5, data_width=32,
-                                            granularity=8)
+                                            granularity=8,
+                                            features={'stall'})
             umap = MemoryMap(addr_width=7, data_width=8, name="uart_map")
             cvtuartbus.memory_map = umap
-            self._decoder.add(cvtuartbus, addr=0xc0002000) # 16550 UART addr
+            self._decoder.add(cvtuartbus, addr=uart_addr) # 16550 UART addr
             self.cvtuartbus = cvtuartbus
+            self.intc.add_irq(self.uart.irq, index=uart_irqno)
 
         # SDRAM module using opencores sdr_ctrl
         """
@@ -310,11 +394,24 @@ class DDR3SoC(SoC, Elaboratable):
         """
 
         # DRAM Module
-        if ddr_pins is not None or fpga == 'sim':
+        if ddr_pins is not None: # or fpga == 'sim':
             ddrmodule = dram_cls(clk_freq, "1:2") # match DDR3 ASIC P/N
 
-            drs = lambda x: x
-            #drs = DomainRenamer("dramsync")
+            # remap both the sync domain (wherever it occurs) and
+            # the sync2x domain.  technically this should NOT be done.
+            # it's a bit of a mess.  ok: this should be done only
+            # when dramsync===sync (and dramsync2x===sync2x)
+            drs = DomainRenamer({"sync": "dramsync",
+                                 "sync2x": "dramsync2x"})
+
+            # HOWEVER, when the ASyncBridge is deployed, the two domains
+            # must NOT be renamed, instead this used:
+            #drs = lambda x: x
+            # and then the ASyncBridge takes care of the two.
+            # but, back in ecp5_crg.py, when ASyncBridge is added,
+            # dram_clk_freq must be passed to ECP5CRG, which will call
+            # ECP5CRG.phase2_domain on your behalf, setting up the
+            # necessary dramsync2x which is needed for the xdr=4 IOpads
 
             if fpga == 'sim':
                 self.ddrphy = FakePHY(module=ddrmodule,
@@ -335,26 +432,98 @@ class DDR3SoC(SoC, Elaboratable):
                 self.dramcore = drs(dramcore)
             self._decoder.add(self.dramcore.bus, addr=dramcore_addr)
 
-            # map the DRAM onto Wishbone
-            drambone = gramWishbone(dramcore)
+            # map the DRAM onto Wishbone, XXX use stall but set classic below
+            # XXX WHEN ADDING ASYNCBRIDGE IT IS THE **BRIDGE** THAT MUST
+            # XXX HAVE THE STALL SIGNAL, AND THE **BRIDGE** THAT MUST HAVE
+            # XXX stall=stb&~ack APPLIED
+            drambone = gramWishbone(dramcore, features={'stall'})
             if fpga == 'sim':
                 self.drambone = drambone
             else:
                 self.drambone = drs(drambone)
+            # XXX ADD THE ASYNCBRIDGE NOT THE DRAMBONE.BUS, THEN
+            # XXX ADD DRAMBONE.BUS TO ASYNCBRIDGE
             self._decoder.add(self.drambone.bus, addr=ddr_addr)
 
+        # additional SRAM at address if DRAM is not also at 0x0
+        # (TODO, check Flash, and HyperRAM as well)
+        if (ddr_pins is None or ddr_addr != 0x0) and fw_addr != 0:
+            print ("SRAM 0x8000 at address 0x0")
+            sram_width = 32
+            self.sram = SRAMPeripheral(size=0x8000,
+                                      data_width=sram_width,
+                                      writable=True)
+            self._decoder.add(self.sram.bus, addr=0x0) # RAM at 0x0
+
+        # SPI controller
+        if spi_0_pins is not None and fpga in ['sim',
+                                             'isim',
+                                             'rcs_arctic_tern_bmc_card',
+                                             'versa_ecp5',
+                                             'versa_ecp5_85',
+                                             'arty_a7']:
+            # The Lattice ECP5 devices require special handling on the
+            # dedicated SPI clock line, which is shared with the internal
+            # SPI controller used for FPGA bitstream loading.
+            spi0_is_lattice_ecp5_clk = False
+            if fpga in ['versa_ecp5',
+                        'versa_ecp5_85',
+                        'rcs_arctic_tern_bmc_card',
+                        'isim']:
+                spi0_is_lattice_ecp5_clk = True
+
+            # Tercel contains two independent Wishbone regions, a
+            # configuration region and the direct API access region,
+            # Set the SPI 0 access region to 16MB, as the FPGA
+            # bitstream Flash device is unlikely to be larger than this.
+            # The main SPI Flash (SPI 1) should be set to at
+            # least 28 bits (256MB) to allow the use of large 4BA devices.
+            self.spi0 = Tercel(data_width=32, spi_region_addr_width=24,
+                               adr_offset=spi0_addr,
+                               features={'stall'},
+                               clk_freq=clk_freq,
+                               pins=spi_0_pins,
+                               lattice_ecp5_usrmclk=spi0_is_lattice_ecp5_clk)
+            self._decoder.add(self.spi0.bus, addr=spi0_addr)
+            self._decoder.add(self.spi0.cfg_bus, addr=spi0_cfg_addr)
+
+        # Ethernet MAC
+        if ethmac_0_pins is not None and fpga in ['versa_ecp5',
+                                                  'versa_ecp5_85',
+                                                  'isim']:
+            self.eth_irq = IRQLine()
+            # The OpenCores Ethernet MAC contains two independent Wishbone
+            # interfaces, a slave (configuration) interface and a master (DMA)
+            # interface.
+            self.eth0 = EthMAC(pins=ethmac_0_pins, irq=self.eth_irq)
+            self._arbiter.add(self.eth0.master_bus)
+            self._decoder.add(self.eth0.slave_bus, addr=eth0_cfg_addr)
+            self.intc.add_irq(self.eth0.irq, index=eth0_irqno)
+
+        # HyperRAM modules *plural*. Assumes using a Quad PMOD by Piotr
+        # Esden, sold by 1bitsquared, only doing one CS_N enable at the
+        # moment
+        if hyperram_pins is not None:
+            self.hyperram = HyperRAM(io=hyperram_pins, phy_kls=HyperRAMPHY,
+                                     features={'stall'},
+                                     latency=7) # Winbond W956D8MBYA
+            self._decoder.add(self.hyperram.bus, addr=hyperram_addr)
+
         self.memory_map = self._decoder.bus.memory_map
 
         self.clk_freq = clk_freq
+        self.fpga = fpga
 
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
 
         # add the peripherals and clock-reset-generator
-        if platform is not None:
+        if platform is not None and hasattr(self, "crg"):
             m.submodules.sysclk = self.crg
 
+        if hasattr(self, "sram"):
+            m.submodules.sram = self.sram
         if hasattr(self, "bootmem"):
             m.submodules.bootmem = self.bootmem
         m.submodules.syscon = self.syscon
@@ -367,22 +536,27 @@ class DDR3SoC(SoC, Elaboratable):
             comb += self.uart.ri_i.eq(0)
             comb += self.uart.dcd_i.eq(1)
             # sigh connect up the wishbone bus manually to deal with
-            # the mis-match on the data
+            # the mis-match on the data.  nmigen-soc "sparse" MemoryMap
+            # should be able to deal with this. TODO, investigate
             uartbus = self.uart.bus
             comb += uartbus.adr.eq(self.cvtuartbus.adr)
             comb += uartbus.stb.eq(self.cvtuartbus.stb)
             comb += uartbus.cyc.eq(self.cvtuartbus.cyc)
+            comb += uartbus.sel.eq(self.cvtuartbus.sel)
             comb += uartbus.we.eq(self.cvtuartbus.we)
             comb += uartbus.dat_w.eq(self.cvtuartbus.dat_w) # drops 8..31
             comb += self.cvtuartbus.dat_r.eq(uartbus.dat_r) # drops 8..31
             comb += self.cvtuartbus.ack.eq(uartbus.ack)
+            # aaand with the WB4-pipeline-to-WB3-classic mismatch, sigh
+            comb += uartbus.stall.eq(uartbus.cyc & ~uartbus.ack)
+            comb += self.cvtuartbus.stall.eq(uartbus.stall)
         if hasattr(self, "cpu"):
             m.submodules.intc = self.intc
             m.submodules.extcore = self.cpu
             m.submodules.dbuscvt = self.dbusdowncvt
             m.submodules.ibuscvt = self.ibusdowncvt
             # create stall sigs, assume wishbone classic
-            #ibus, dbus = self.cpu.ibus, self.cpu.dbus
+            #ibus, dbus = self.cvtibus, self.cvtdbus
             #comb += ibus.stall.eq(ibus.stb & ~ibus.ack)
             #comb += dbus.stall.eq(dbus.stb & ~dbus.ack)
 
@@ -391,7 +565,21 @@ class DDR3SoC(SoC, Elaboratable):
         if hasattr(self, "ddrphy"):
             m.submodules.ddrphy = self.ddrphy
             m.submodules.dramcore = self.dramcore
-            m.submodules.drambone = self.drambone
+            m.submodules.drambone = drambone = self.drambone
+            # grrr, same problem with drambone: not WB4-pipe compliant
+            # XXX TAKE THIS OUT, REPLACE WITH ASYNCBRIDGE HAVING
+            # XXX asyncbridge.bus.stall.eq(asyncbridge.bus.cyc & ...)
+            comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack)
+
+        # add hyperram module
+        if hasattr(self, "hyperram"):
+            m.submodules.hyperram = hyperram = self.hyperram
+            # grrr, same problem with hyperram: not WB4-pipe compliant
+            comb += hyperram.bus.stall.eq(hyperram.bus.cyc & ~hyperram.bus.ack)
+            # set 3 top CSn lines to zero for now
+            if self.fpga == 'arty_a7':
+                comb += hyperram.phy.rst_n.eq(ResetSignal())
+
         # add blinky lights so we know FPGA is alive
         if platform is not None:
             m.submodules.blinky = Blinky()
@@ -401,15 +589,43 @@ class DDR3SoC(SoC, Elaboratable):
         comb += self._arbiter.bus.connect(self._decoder.bus)
 
         if hasattr(self, "cpu"):
-            # wire up the CPU interrupts
-            comb += self.cpu.irq.eq(self.intc.ip)
+            m.submodules.xics_icp = icp = self.xics_icp
+            m.submodules.xics_ics = ics = self.xics_ics
+            comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
+            comb += self.cpu.irq.eq(icp.core_irq_o) # connect ICP to core
+
+            # wire up the CPU interrupts from the GenericInterrupt
+            comb += self.int_level_i.eq(self.intc.ip)
+
+            # grrr
+            comb += self.pbus.stall.eq(self.pbus.cyc & ~self.pbus.ack)
+            comb += self.sbus.stall.eq(self.sbus.cyc & ~self.sbus.ack)
+
+            # and also wire up make_wb_layout() to wishbone.Interface.
+            # really, XICS_ICS and XICS_ICP both need to be converted
+            # to use wishbone.Interface and this all goes
+            comb += icp.bus.adr.eq(self.pbus.adr)
+            comb += icp.bus.dat_w.eq(self.pbus.dat_w)
+            comb += icp.bus.cyc.eq(self.pbus.cyc)
+            comb += icp.bus.stb.eq(self.pbus.stb)
+            comb += icp.bus.we.eq(self.pbus.we)
+            comb += self.pbus.ack.eq(icp.bus.ack)
+            comb += self.pbus.dat_r.eq(icp.bus.dat_r)
+            comb += ics.bus.adr.eq(self.sbus.adr)
+            comb += ics.bus.dat_w.eq(self.sbus.dat_w)
+            comb += ics.bus.cyc.eq(self.sbus.cyc)
+            comb += ics.bus.stb.eq(self.sbus.stb)
+            comb += ics.bus.we.eq(self.sbus.we)
+            comb += self.sbus.ack.eq(ics.bus.ack)
+            comb += self.sbus.dat_r.eq(ics.bus.dat_r)
 
         if platform is None:
             return m
 
         # add uart16550 verilog source. assumes a directory
         # structure where ls2 has been checked out in a common
-        # subdirectory as https://github.com/freecores/uart16550
+        # subdirectory as:
+        # git clone https://github.com/freecores/uart16550
         opencores_16550 = "../../uart16550/rtl/verilog"
         pth = os.path.split(__file__)[0]
         pth = os.path.join(pth, opencores_16550)
@@ -417,6 +633,39 @@ class DDR3SoC(SoC, Elaboratable):
         print (fname)
         self.uart.add_verilog_source(fname, platform)
 
+        if hasattr(self, "spi0"):
+            # add spi submodule
+            m.submodules.spi0 = spi = self.spi0
+            # gonna drive me nuts, this.
+            comb += spi.bus.stall.eq(spi.bus.cyc & ~spi.bus.ack)
+            comb += spi.cfg_bus.stall.eq(spi.cfg_bus.cyc & ~spi.cfg_bus.ack)
+
+            # add Tercel verilog source. assumes a directory structure where
+            # microwatt has been checked out in a common subdirectory with:
+            # git clone https://git.libre-soc.org/git/microwatt.git tercel-qspi
+            # git checkout 882ace781e4
+            raptor_tercel = "../../tercel-qspi/tercel"
+            pth = os.path.split(__file__)[0]
+            pth = os.path.join(pth, raptor_tercel)
+            fname = os.path.abspath(pth)
+            print (fname)
+            self.spi0.add_verilog_source(fname, platform)
+
+        if hasattr(self, "eth0"):
+            # add ethernet submodule
+            m.submodules.eth0 = ethmac = self.eth0
+
+            # add EthMAC verilog source. assumes a directory
+            # structure where the opencores ethmac has been checked out
+            # in a common subdirectory as:
+            # git clone https://github.com/freecores/ethmac
+            opencores_ethmac = "../../ethmac/rtl/verilog"
+            pth = os.path.split(__file__)[0]
+            pth = os.path.join(pth, opencores_ethmac)
+            fname = os.path.abspath(pth)
+            print (fname)
+            self.eth0.add_verilog_source(fname, platform)
+
         # add the main core
         pth = os.path.split(__file__)[0]
         pth = os.path.join(pth, '../external_core_top.v')
@@ -431,6 +680,8 @@ class DDR3SoC(SoC, Elaboratable):
         # and at the moment that's just UART tx/rx.
         ports = []
         ports += [self.uart.tx_o, self.uart.rx_i]
+        if hasattr(self, "hyperram"):
+            ports += list(self.hyperram.ports())
         if hasattr(self, "ddrphy"):
             if hasattr(self.ddrphy, "pads"): # real PHY
                 ports += list(self.ddrphy.pads.fields.values())
@@ -444,17 +695,14 @@ class DDR3SoC(SoC, Elaboratable):
                 for phase in self.dramcore.dfii._inti.phases:
                     print ("dfi master", phase)
                     ports += list(phase.fields.values())
-        ports += [ClockSignal(), ClockSignal("dramsync"), ResetSignal()]
+        ports += [ClockSignal(), ResetSignal()]
         return ports
 
-if __name__ == "__main__":
+def build_platform(fpga, firmware):
 
-    # create a platform selected from the toolchain. defaults to VERSA_ECP5
-    # only VERSA_ECP5 will work for now because of the DDR3 module
-    fpga = "versa_ecp5"
-    if len(sys.argv) >= 2:
-        fpga = sys.argv[1]
+    # create a platform selected from the toolchain. 
     platform_kls =  {'versa_ecp5': VersaECP5Platform,
+                     'versa_ecp5_85': VersaECP5Platform85,
                      'ulx3s': ULX3S_85F_Platform,
                      'arty_a7': ArtyA7_100Platform,
                      'isim': IcarusVersaPlatform,
@@ -462,12 +710,14 @@ if __name__ == "__main__":
                     }[fpga]
     toolchain = {'arty_a7': "yosys_nextpnr",
                  'versa_ecp5': 'Trellis',
+                 'versa_ecp5_85': 'Trellis',
                  'isim': 'Trellis',
                  'ulx3s': 'Trellis',
                  'sim': None,
                 }.get(fpga, None)
     dram_cls = {'arty_a7': None,
                  'versa_ecp5': MT41K64M16,
+                 'versa_ecp5_85': MT41K64M16,
                  #'versa_ecp5': MT41K256M16,
                  'ulx3s': None,
                  'sim': MT41K256M16,
@@ -475,20 +725,36 @@ if __name__ == "__main__":
                 }.get(fpga, None)
     if platform_kls is not None:
         platform = platform_kls(toolchain=toolchain)
+        if fpga == 'versa_ecp5_85':
+            platform.speed = "7" # HACK. speed grade 7, sigh
     else:
         platform = None
 
+    print ("platform", fpga, firmware, platform)
+
     # set clock frequency
     clk_freq = 70e6
     if fpga == 'sim':
         clk_freq = 100e6
-
-    # select a firmware file
-    firmware = None
+    if fpga == 'isim':
+        clk_freq = 55e6 # below 50 mhz, stops DRAM being enabled
+    if fpga == 'versa_ecp5':
+        clk_freq = 50e6 # crank right down to test hyperram
+    if fpga == 'versa_ecp5_85':
+        # 50MHz works.  100MHz works.  55MHz does NOT work.
+        # Stick with multiples of 50MHz...
+        clk_freq = 50e6
+    if fpga == 'arty_a7':
+        clk_freq = 50e6
+    if fpga == 'ulx3s':
+        clk_freq = 40.0e6
+
+    # select a firmware address
     fw_addr = None
-    if len(sys.argv) >= 3:
-        firmware = sys.argv[2]
-        fw_addr = 0x0000_0000
+    if firmware is not None:
+        fw_addr = 0xff00_0000 # firmware at HI address, now
+
+    print ("fpga", fpga, "firmware", firmware)
 
     # get UART resource pins
     if platform is not None:
@@ -496,27 +762,154 @@ if __name__ == "__main__":
     else:
         uart_pins = Record([('tx', 1), ('rx', 1)], name="uart_0")
 
-    # get DDR resource pins
+    # get DDR resource pins, disable if clock frequency is below 50 mhz for now
     ddr_pins = None
-    if platform is not None and fpga in ['versa_ecp5', 'arty_a7', 'isim']:
+    if (clk_freq >= 50e6 and platform is not None and
+        fpga in ['versa_ecp5', 'versa_ecp5_85', 'arty_a7', 'isim']):
         ddr_pins = platform.request("ddr3", 0,
                                     dir={"dq":"-", "dqs":"-"},
-                                    xdr={"rst": 4, "clk":4, "a":4,
+                                    xdr={"rst": 1, "clk":4, "a":4,
                                          "ba":4, "clk_en":4,
                                          "odt":4, "ras":4, "cas":4, "we":4,
                                          "cs": 4})
 
+    # Get SPI resource pins
+    spi_0_pins = None
+    if platform is not None and \
+       fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim']:
+        # Override here to get FlashResource out of the way and enable Tercel
+        # direct access to the SPI flash.
+        # each pin needs a separate direction control
+        spi_0_ios = [
+            Resource("spi_0", 0,
+                     Subsignal("dq0",   Pins("W2", dir="io")),
+                     Subsignal("dq1",   Pins("V2", dir="io")),
+                     Subsignal("dq2",   Pins("Y2", dir="io")),
+                     Subsignal("dq3",   Pins("W1", dir="io")),
+                     Subsignal("cs_n", Pins("R2", dir="o")),
+                     Attrs(PULLMODE="NONE", DRIVE="4", IO_TYPE="LVCMOS33"))
+        ]
+        platform.add_resources(spi_0_ios)
+        spi_0_pins = platform.request("spi_0", 0, dir={"cs_n":"o"},
+                                                  xdr={"dq0":1, "dq1": 1,
+                                                       "dq2":1, "dq3": 1,
+                                                       "cs_n":0})
+
+    if platform is not None and \
+       fpga in ['arty_a7']:
+        # each pin needs a separate direction control
+        spi_0_ios = [
+            Resource("spi_0", 0,
+                     Subsignal("dq0",  Pins("K17", dir="io")),
+                     Subsignal("dq1",  Pins("K18", dir="io")),
+                     Subsignal("dq2",  Pins("L14", dir="io")),
+                     Subsignal("dq3",  Pins("M14", dir="io")),
+                     Subsignal("cs_n", Pins("L13", dir="o")),
+                     Subsignal("clk",  Pins("L16", dir="o")),
+                     Attrs(PULLMODE="NONE", DRIVE="4", IO_TYPE="LVCMOS33"))
+        ]
+        platform.add_resources(spi_0_ios)
+        spi_0_pins = platform.request("spi_0", 0)
+
+    print ("spiflash pins", spi_0_pins)
+
+    # Get Ethernet RMII resource pins
+    ethmac_0_pins = None
+    if False and platform is not None and \
+       fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim']:
+        # Mainly on X3 connector, MDIO on X4 due to lack of pins
+        ethmac_0_ios = [
+            Resource("ethmac_0", 0,
+                     Subsignal("mtx_clk",   Pins("B19", dir="i")),
+                     Subsignal("mtxd",      Pins("B12 B9 E6 D6", dir="o")),
+                     Subsignal("mtxen",     Pins("E7", dir="o")),
+                     Subsignal("mtxerr",    Pins("D7", dir="o")),
+                     Subsignal("mrx_clk",   Pins("B11", dir="i")),
+                     Subsignal("mrxd",      Pins("B6 E9 D9 B8", dir="i")),
+                     Subsignal("mrxdv",     Pins("C8", dir="i")),
+                     Subsignal("mrxerr",    Pins("D8", dir="i")),
+                     Subsignal("mcoll",     Pins("E8", dir="i")),
+                     Subsignal("mcrs",      Pins("C7", dir="i")),
+                     Subsignal("mdc",       Pins("B18", dir="o")),
+                     Subsignal("md",        Pins("A18", dir="io")),
+                     Attrs(PULLMODE="NONE", DRIVE="8", SLEWRATE="FAST",
+                           IO_TYPE="LVCMOS33"))
+        ]
+        platform.add_resources(ethmac_0_ios)
+        ethmac_0_pins = platform.request("ethmac_0", 0,
+                                        dir={"mtx_clk":"i", "mtxd":"o",
+                                             "mtxen":"o",
+                                             "mtxerr":"o", "mrx_clk":"i", 
+                                             "mrxd":"i",
+                                             "mrxdv":"i", "mrxerr":"i", 
+                                             "mcoll":"i",
+                                             "mcrs":"i", "mdc":"o", "md":"io"},
+                                        xdr={"mtx_clk": 0, "mtxd": 0, 
+                                             "mtxen": 0,
+                                             "mtxerr": 0, "mrx_clk": 0, 
+                                             "mrxd": 0,
+                                             "mrxdv": 0, "mrxerr": 0, 
+                                             "mcoll": 0,
+                                             "mcrs": 0, "mdc": 0, "md": 0})
+    print ("ethmac pins", ethmac_0_pins)
+
+    # Get HyperRAM pins
+    hyperram_pins = None
+    if platform is None:
+        hyperram_pins = HyperRAMPads()
+    elif fpga in ['isim']:
+        hyperram_ios = HyperRAMResource(0, cs_n="B13",
+                                        dq="E14 C10 B10 E12 D12 A9 D11 D14",
+                                        rwds="C14", rst_n="E13", ck_p="D13",
+                                        attrs=Attrs(IO_TYPE="LVCMOS33"))
+        platform.add_resources(hyperram_ios)
+        hyperram_pins = platform.request("hyperram")
+        print ("isim a7 hyperram", hyperram_ios)
+    # Digilent Arty A7-100t
+    elif platform is not None and fpga in ['arty_a7']:
+        hyperram_ios = HyperRAMResource(0, cs_n="V12 V14 U12 U14",
+                                        dq="D4 D3 F4 F3 G2 H2 D2 E2",
+                                        rwds="U13", rst_n="T13", ck_p="V10",
+                                        # ck_n="V11" - for later (DDR)
+                                        attrs=Attrs(IOSTANDARD="LVCMOS33"))
+        platform.add_resources(hyperram_ios)
+        hyperram_pins = platform.request("hyperram")
+        print ("arty a7 hyperram", hyperram_ios)
+    # VERSA ECP5
+    elif False and platform is not None and fpga in \
+                ['versa_ecp5', 'versa_ecp5_85']:
+        hyperram_ios = HyperRAMResource(0, cs_n="B13",
+                                        dq="E14 C10 B10 E12 D12 A9 D11 D14",
+                                        rwds="C14", rst_n="E13", ck_p="D13",
+                                        attrs=Attrs(IO_TYPE="LVCMOS33"))
+        platform.add_resources(hyperram_ios)
+        hyperram_pins = platform.request("hyperram")
+        print ("versa ecp5 hyperram", hyperram_ios)
+    print ("hyperram pins", hyperram_pins)
+
     # set up the SOC
     soc = DDR3SoC(fpga=fpga, dram_cls=dram_cls,
                   # check microwatt_soc.h for these
-                  ddrphy_addr=0xff000000,   # DRAM_INIT_BASE firmware base
+                  ddrphy_addr=0xfff00000,   # DRAM_INIT_BASE, PHY address
                   dramcore_addr=0xc8000000, # DRAM_CTRL_BASE
-                  ddr_addr=0x40000000,      # DRAM_BASE
+                  ddr_addr=0x00000000,      # DRAM_BASE
+                  spi0_addr=0xf0000000,     # SPI0_BASE
+                  spi0_cfg_addr=0xc0006000, # SPI0_CTRL_BASE
+                  eth0_cfg_addr=0xc000c000, # ETH0_CTRL_BASE (4k)
+                  eth0_irqno=1,             # ETH0_IRQ number (match microwatt)
+                  hyperram_addr=0xa0000000, # HYPERRAM_BASE
                   fw_addr=fw_addr,
                   #fw_addr=None,
                   ddr_pins=ddr_pins,
                   uart_pins=uart_pins,
+                  uart_irqno=0,             # UART_IRQ number (match microwatt)
+                  uart_addr=0xc0002000, # UART0_ADDR
+                  spi_0_pins=spi_0_pins,
+                  ethmac_0_pins=ethmac_0_pins,
+                  hyperram_pins=hyperram_pins,
                   firmware=firmware,
+                  xics_icp_addr=0xc000_4000, # XICS_ICP_BASE
+                  xics_ics_addr=0xc000_5000, # XICS_ICS_BASE
                   clk_freq=clk_freq,
                   add_cpu=True)
 
@@ -539,3 +932,14 @@ if __name__ == "__main__":
         with open("ls2.v", "w") as f:
             f.write(vl)
 
+
+# urrr this gets exec()d by the build process without arguments
+# which screws up.  use the arty_a7_ls2.py etc. with no arguments
+if __name__ == '__main__':
+    fpga = None
+    firmware = None
+    if len(sys.argv) >= 2:
+        fpga = sys.argv[1]
+    if len(sys.argv) >= 3:
+        firmware = sys.argv[2]
+    build_platform(fpga, firmware)