remove unneeded model variable
[ls2.git] / src / ls2.py
index 9482112954167e9371f463638ddcc312d152369c..3aa51e48d9d3f017e707303961ec666b7527e9bd 100644 (file)
@@ -1,5 +1,6 @@
 # Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+# Copyright (C) 2022 Raptor Engineering, LLC <support@raptorengineering.com>
 #
 # Based on code from LambaConcept, from the gram example which is BSD-2-License
 # https://github.com/jeanthom/gram/tree/master/examples
 # Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER
 # under EU Grants 871528 and 957073, under the LGPLv3+ License
 
-from nmigen import (Module, Elaboratable, DomainRenamer, Record)
+from nmigen import (Module, Elaboratable, DomainRenamer, Record,
+                    Signal, Cat, Const, ClockSignal, ResetSignal)
+from nmigen.build.dsl import Attrs
 from nmigen.cli import verilog
 from nmigen.lib.cdc import ResetSynchronizer
 from nmigen_soc import wishbone, memory
 from nmigen_soc.memory import MemoryMap
+from nmigen.utils import log2_int
 
 from nmigen_stdio.serial import AsyncSerial
 
-from lambdasoc.cpu.minerva import MinervaCPU
+# HyperRAM
+from nmigen_boards.resources.memory import HyperRAMResource
+from lambdasoc.periph.hyperram import HyperRAM, HyperRAMPads, HyperRAMPHY
+
 from lambdasoc.periph.intc import GenericInterruptController
 from lambdasoc.periph.sram import SRAMPeripheral
 from lambdasoc.periph.timer import TimerPeripheral
 from lambdasoc.periph import Peripheral
 from lambdasoc.soc.base import SoC
 from soc.bus.uart_16550 import UART16550 # opencores 16550 uart
+from soc.bus.tercel import Tercel # SPI XIP master
 from soc.bus.external_core import ExternalCore # external libresoc/microwatt
 from soc.bus.wb_downconvert import WishboneDownConvert
+from soc.bus.syscon import MicrowattSYSCON
 
+# DDR3
+from gram.common import (PhySettings, get_cl_cw, get_sys_latency,
+                        get_sys_phases,)
 from gram.core import gramCore
 from gram.phy.ecp5ddrphy import ECP5DDRPHY
-from gram.modules import MT41K256M16
+from gram.phy.fakephy import FakePHY, SDRAM_VERBOSE_STD, SDRAM_VERBOSE_DBG
+from gram.modules import MT41K256M16, MT41K64M16
 from gram.frontend.wishbone import gramWishbone
 
+# Board (and simulation) platforms
 from nmigen_boards.versa_ecp5 import VersaECP5Platform
+from nmigen_boards.versa_ecp5 import VersaECP5Platform85 # custom board
 from nmigen_boards.ulx3s import ULX3S_85F_Platform
 from nmigen_boards.arty_a7 import ArtyA7_100Platform
 from nmigen_boards.test.blinky import Blinky
-
-from crg import ECPIX5CRG
+from icarusversa import IcarusVersaPlatform
+# Clock-Reset Generator (works for all ECP5 platforms)
+from ecp5_crg import ECP5CRG
+from arty_crg import ArtyA7CRG
 
 import sys
 import os
 
+def sim_ddr3_settings(clk_freq=100e6):
+    tck = 2/(2*2*clk_freq)
+    nphases = 2
+    databits = 16
+    nranks = 1
+    addressbits = 14
+    bankbits = 3
+    cl, cwl = get_cl_cw("DDR3", tck)
+    cl_sys_latency = get_sys_latency(nphases, cl)
+    cwl_sys_latency = get_sys_latency(nphases, cwl)
+    rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl)
+    wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl)
+    return PhySettings(
+        phytype="ECP5DDRPHY",
+        memtype="DDR3",
+        databits=databits,
+        dfi_databits=4*databits,
+        nranks=nranks,
+        nphases=nphases,
+        rdphase=rdphase,
+        wrphase=wrphase,
+        rdcmdphase=rdcmdphase,
+        wrcmdphase=wrcmdphase,
+        cl=cl,
+        cwl=cwl,
+        read_latency=2 + cl_sys_latency + 2 + log2_int(4//nphases) + 4,
+        write_latency=cwl_sys_latency
+    )
+
+
+class WB64to32Convert(Elaboratable):
+    """Microwatt IO wishbone slave 64->32 bits converter
+
+    For timing reasons, this adds a one cycle latch on the way both
+    in and out. This relaxes timing and routing pressure on the "main"
+    memory bus by moving all simple IOs to a slower 32-bit bus.
+
+    This implementation is rather dumb at the moment, no stash buffer,
+    so we stall whenever that latch is busy. This can be improved.
+    """
+    def __init__(self, master, slave):
+        self.master = master
+        self.slave = slave
+
+    def elaborate(self, platform):
+        m = Module()
+        comb, sync = m.d.comb, m.d.sync
+        master, slave = self.master, self.slave
+
+        has_top = Signal()
+        has_top_r = Signal()
+        has_bot = Signal()
+
+        with m.FSM() as fsm:
+            with m.State("IDLE"):
+                # Clear ACK (and has_top_r) in case it was set
+                sync += master.ack.eq(0)
+                sync += has_top_r.eq(0)
+
+                # Do we have a cycle ?
+                with m.If(master.cyc & master.stb):
+                    # Stall master until we are done, we are't (yet) pipelining
+                    # this, it's all slow IOs.
+                    sync += master.stall.eq(1)
+
+                    # Start cycle downstream
+                    sync += slave.cyc.eq(1)
+                    sync += slave.stb.eq(1)
+
+                    # Do we have a top word and/or a bottom word ?
+                    comb += has_top.eq(master.sel[4:].bool())
+                    comb += has_bot.eq(master.sel[:4].bool())
+                    # record the has_top flag for the next FSM state
+                    sync += has_top_r.eq(has_top)
+
+                    # Copy write enable to IO out, copy address as well,
+                    # LSB is set later based on HI/LO
+                    sync += slave.we.eq(master.we)
+                    sync += slave.adr.eq(Cat(0, master.adr))
+
+                    # If we have a bottom word, handle it first, otherwise
+                    # send the top word down. XXX Split the actual mux out
+                    # and only generate a control signal.
+                    with m.If(has_bot):
+                        with m.If(master.we):
+                            sync += slave.dat_w.eq(master.dat_w[:32])
+                        sync += slave.sel.eq(master.sel[:4])
+
+                        # Wait for ack on BOTTOM half
+                        m.next = "WAIT_ACK_BOT"
+
+                    with m.Else():
+                        with m.If(master.we):
+                            sync += slave.dat_w.eq(master.dat_w[32:])
+                        sync += slave.sel.eq(master.sel[4:])
+
+                        # Bump LSB of address
+                        sync += slave.adr[0].eq(1)
+
+                        # Wait for ack on TOP half
+                        m.next = "WAIT_ACK_TOP"
+
+
+            with m.State("WAIT_ACK_BOT"):
+                # If we aren't stalled by the device, clear stb
+                if hasattr(slave, "stall"):
+                    with m.If(~slave.stall):
+                        sync += slave.stb.eq(0)
+
+                # Handle ack
+                with m.If(slave.ack):
+                    # If it's a read, latch the data
+                    with m.If(~slave.we):
+                        sync += master.dat_r[:32].eq(slave.dat_r)
+
+                    # Do we have a "top" part as well ?
+                    with m.If(has_top_r):
+                        # Latch data & sel
+                        with m.If(master.we):
+                            sync += slave.dat_w.eq(master.dat_w[32:])
+                        sync += slave.sel.eq(master.sel[4:])
+
+                        # Bump address and set STB
+                        sync += slave.adr[0].eq(1)
+                        sync += slave.stb.eq(1)
+
+                        # Wait for new ack
+                        m.next = "WAIT_ACK_TOP"
+
+                    with m.Else():
+                        # We are done, ack up, clear cyc downstram
+                        sync += slave.cyc.eq(0)
+                        sync += slave.stb.eq(0)
+
+                        # And ack & unstall upstream
+                        sync += master.ack.eq(1)
+                        if hasattr(master , "stall"):
+                            sync += master.stall.eq(0)
+
+                        # Wait for next one
+                        m.next = "IDLE"
+
+            with m.State("WAIT_ACK_TOP"):
+                # If we aren't stalled by the device, clear stb
+                if hasattr(slave, "stall"):
+                    with m.If(~slave.stall):
+                        sync += slave.stb.eq(0)
+
+                # Handle ack
+                with m.If(slave.ack):
+                    # If it's a read, latch the data
+                    with m.If(~slave.we):
+                        sync += master.dat_r[32:].eq(slave.dat_r)
+
+                    # We are done, ack up, clear cyc downstram
+                    sync += slave.cyc.eq(0)
+                    sync += slave.stb.eq(0)
+
+                    # And ack & unstall upstream
+                    sync += master.ack.eq(1)
+                    if hasattr(master, "stall"):
+                        sync += master.stall.eq(0)
+
+                    # Wait for next one
+                    m.next = "IDLE"
+
+        return m
+
 
 class DDR3SoC(SoC, Elaboratable):
     def __init__(self, *,
-                 uart_pins, ddr_pins,
-                 ddrphy_addr, dramcore_addr,
-                 ddr_addr, fw_addr=0x0000_0000,
+                 fpga,
+                 dram_cls,
+                 uart_pins, spi_0_pins,
+                 ddr_pins, ddrphy_addr, dramcore_addr, ddr_addr,
+                 fw_addr=0x0000_0000,
                  firmware=None,
-                 clk_freq=40e6):
+                 spi0_addr, spi0_cfg_addr,
+                 hyperram_addr=None,
+                 hyperram_pins=None,
+                 clk_freq=50e6,
+                 add_cpu=True):
+
+        # wishbone routing is as follows:
+        #
+        #         SoC
+        #       +--+--+
+        #       |     |
+        #      ibus  dbus
+        #       |     |
+        #       +--+--+
+        #          |
+        #      64to32DownCvt
+        #          |
+        #       arbiter
+        #          |
+        #   +---decoder----+--------+---------+-------+
+        #   |      |       |        |         |       |
+        #  uart  XICS    CSRs     DRAM     XIP SPI HyperRAM
 
         # set up wishbone bus arbiter and decoder. arbiter routes,
         # decoder maps local-relative addressed satellites to global addresses
         self._arbiter = wishbone.Arbiter(addr_width=30, data_width=32,
                                          granularity=8,
-                                         features={"cti", "bte"})
+                                         features={"cti", "bte", "stall"})
         self._decoder = wishbone.Decoder(addr_width=30, data_width=32,
                                          granularity=8,
-                                         features={"cti", "bte"})
+                                         features={"cti", "bte", "stall"})
 
         # default firmware name
         if firmware is None:
             firmware = "firmware/main.bin"
 
         # set up clock request generator
-        self.crg = ECPIX5CRG()
+        if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s']:
+            self.crg = ECP5CRG(clk_freq)
+        if fpga in ['arty_a7']:
+            self.crg = ArtyA7CRG(clk_freq)
 
         # set up CPU, with 64-to-32-bit downconverters
-        self.cpu = ExternalCore(name="ext_core")
-        cvtdbus = wishbone.Interface(addr_width=30, data_width=32,
-                                     granularity=8)
-        cvtibus = wishbone.Interface(addr_width=30, data_width=32,
-                                     granularity=8)
-        self.dbusdowncvt = WishboneDownConvert(self.cpu.dbus, cvtdbus)
-        self.ibusdowncvt = WishboneDownConvert(self.cpu.ibus, cvtibus)
-        self._arbiter.add(cvtibus) # I-Cache Master
-        self._arbiter.add(cvtdbus) # D-Cache Master. TODO JTAG master
-
-        # CPU interrupt controller
-        self.intc = GenericInterruptController(width=len(self.cpu.irq))
+        if add_cpu:
+            self.cpu = ExternalCore(name="ext_core")
+            cvtdbus = wishbone.Interface(addr_width=30, data_width=32,
+                                         granularity=8, features={'stall'})
+            cvtibus = wishbone.Interface(addr_width=30, data_width=32,
+                                         granularity=8, features={'stall'})
+            self.dbusdowncvt = WB64to32Convert(self.cpu.dbus, cvtdbus)
+            self.ibusdowncvt = WB64to32Convert(self.cpu.ibus, cvtibus)
+            self._arbiter.add(cvtibus) # I-Cache Master
+            self._arbiter.add(cvtdbus) # D-Cache Master. TODO JTAG master
+            self.cvtibus = cvtibus
+            self.cvtdbus = cvtdbus
+
+            # CPU interrupt controller
+            self.intc = GenericInterruptController(width=len(self.cpu.irq))
 
         # SRAM (but actually a ROM, for firmware), at address 0x0
         if fw_addr is not None:
             sram_width = 32
-            self.bootmem = SRAMPeripheral(size=0x10000, data_width=sram_width,
+            self.bootmem = SRAMPeripheral(size=0x8000, data_width=sram_width,
                                       writable=True)
-            with open(firmware, "rb") as f:
-                words = iter(lambda: f.read(sram_width // 8), b'')
-                bios  = [int.from_bytes(w, "little") for w in words]
-            self.bootmem.init = bios
+            if firmware is not None:
+                with open(firmware, "rb") as f:
+                    words = iter(lambda: f.read(sram_width // 8), b'')
+                    bios  = [int.from_bytes(w, "little") for w in words]
+                self.bootmem.init = bios
             self._decoder.add(self.bootmem.bus, addr=fw_addr) # ROM at fw_addr
 
-        # SRAM (read-writeable BRAM)
-        self.ram = SRAMPeripheral(size=4096)
-        self._decoder.add(self.ram.bus, addr=0x8000000) # SRAM at 0x8000_000
+        # System Configuration info
+        self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq,
+                                      has_uart=(uart_pins is not None))
+        self._decoder.add(self.syscon.bus, addr=0xc0000000) # at 0xc000_0000
+
+        if False:
+            # SRAM (read-writeable BRAM)
+            self.ram = SRAMPeripheral(size=4096)
+            self._decoder.add(self.ram.bus, addr=0x8000000) # at 0x8000_0000
 
-        # UART
+        # UART at 0xC000_2000, convert 32-bit bus down to 8-bit in an odd way
         if uart_pins is not None:
-            self.uart = UART16550()
+            # sigh actual UART in microwatt is 8-bit
+            self.uart = UART16550(data_width=8, pins=uart_pins,
+                                  features={'stall'})
+            # but (see soc.vhdl) 8-bit regs are addressed at 32-bit locations
+            cvtuartbus = wishbone.Interface(addr_width=5, data_width=32,
+                                            granularity=8,
+                                            features={'stall'})
             umap = MemoryMap(addr_width=7, data_width=8, name="uart_map")
-            #umap.add_resource(self._mem, name="mem", size=1<<5)
-            self.uart.bus.memory_map = umap
-            self._decoder.add(self.uart.bus, addr=0xc0002000) # 16550 UART addr
+            cvtuartbus.memory_map = umap
+            self._decoder.add(cvtuartbus, addr=0xc0002000) # 16550 UART addr
+            self.cvtuartbus = cvtuartbus
+
+        # SDRAM module using opencores sdr_ctrl
+        """
+        class MT48LC16M16(SDRModule):
+            # geometry
+            nbanks = 4
+            nrows  = 8192
+            ncols  = 512
+            # timings
+            technology_timings = _TechnologyTimings(tREFI=64e6/8192,
+                                                    tWTR=(2, None),
+                                                    tCCD=(1, None),
+                                                    tRRD=(None, 15))
+            speedgrade_timings = {"default": _SpeedgradeTimings(tRP=20,
+                                                    tRCD=20,
+                                                    tWR=15,
+                                                    tRFC=(None, 66),
+                                                    tFAW=None,
+                                                    tRAS=44)}
+        """
 
         # DRAM Module
-        if ddr_pins is not None:
-            self.ddrphy = DomainRenamer("dramsync")(ECP5DDRPHY(ddr_pins,
-                                                    sys_clk_freq=100e6))
-            self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr)
-
-            ddrmodule = MT41K256M16(clk_freq, "1:2") # match DDR3 ASIC P/N
+        if ddr_pins is not None or fpga == 'sim':
+            ddrmodule = dram_cls(clk_freq, "1:2") # match DDR3 ASIC P/N
 
+            #drs = lambda x: x
             drs = DomainRenamer("dramsync")
+
+            if fpga == 'sim':
+                self.ddrphy = FakePHY(module=ddrmodule,
+                                      settings=sim_ddr3_settings(clk_freq),
+                                      verbosity=SDRAM_VERBOSE_DBG,
+                                      clk_freq=clk_freq)
+            else:
+                self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=clk_freq))
+            self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr)
+
             dramcore = gramCore(phy=self.ddrphy,
                                 geom_settings=ddrmodule.geom_settings,
                                 timing_settings=ddrmodule.timing_settings,
                                 clk_freq=clk_freq)
-            self.dramcore = drs(dramcore)
+            if fpga == 'sim':
+                self.dramcore = dramcore
+            else:
+                self.dramcore = drs(dramcore)
             self._decoder.add(self.dramcore.bus, addr=dramcore_addr)
 
-            # map the DRAM onto Wishbone
-            self.drambone = drs(gramWishbone(self.dramcore))
+            # map the DRAM onto Wishbone, XXX use stall but set classic below
+            drambone = gramWishbone(dramcore, features={'stall'})
+            if fpga == 'sim':
+                self.drambone = drambone
+            else:
+                self.drambone = drs(drambone)
             self._decoder.add(self.drambone.bus, addr=ddr_addr)
 
+        # SPI controller
+        if spi_0_pins is not None and fpga in ['sim',
+                                             'rcs_arctic_tern_bmc_card']:
+            # The Lattice ECP5 devices require special handling on the
+            # dedicated SPI clock line, which is shared with the internal
+            # SPI controller used for FPGA bitstream loading.
+            spi0_is_lattice_ecp5_clk = False
+            if platform is not None and fpga in ['versa_ecp5',
+                                                 'rcs_arctic_tern_bmc_card',
+                                                 'isim']:
+                spi0_is_lattice_ecp5_clk = True
+
+            # Tercel contains two independent Wishbone regions, a
+            # configuration region and the direct API access region,
+            # Set the SPI 0 access region to 16MB, as the FPGA
+            # bitstream Flash device is unlikely to be larger than this.
+            # The main SPI Flash (SPI 1) should be set to at
+            # least 28 bits (256MB) to allow the use of large 4BA devices.
+            self.spi0 = Tercel(data_width=32, spi_region_addr_width=24,
+                               clk_freq=clk_freq,
+                               pins=spi_0_pins,
+                               lattice_ecp5_usrmclk=spi0_is_lattice_ecp5_clk)
+            self._decoder.add(self.spi0.bus, addr=spi0_addr)
+            self._decoder.add(self.spi0.cfg_bus, addr=spi0_cfg_addr)
+
+        # HyperRAM modules *plural*. Assumes using a Quad PMOD by Piotr
+        # Esden, sold by 1bitsquared, only doing one CS_N enable at the
+        # moment
+        if hyperram_pins is not None:
+            self.hyperram = HyperRAM(io=hyperram_pins, phy_kls=HyperRAMPHY,
+                                     features={'stall'})
+            self._decoder.add(self.hyperram.bus, addr=hyperram_addr)
+
         self.memory_map = self._decoder.bus.memory_map
 
         self.clk_freq = clk_freq
@@ -131,24 +425,58 @@ class DDR3SoC(SoC, Elaboratable):
         comb = m.d.comb
 
         # add the peripherals and clock-reset-generator
-        if platform is not None:
+        if platform is not None and hasattr(self, "crg"):
             m.submodules.sysclk = self.crg
 
         if hasattr(self, "bootmem"):
             m.submodules.bootmem = self.bootmem
-        m.submodules.ram = self.ram
-        m.submodules.uart = self.uart
-        m.submodules.arbiter = self._arbiter
-        m.submodules.decoder = self._decoder
-        if hasattr(self, "ddrphy"):
-            m.submodules.ddrphy = self.ddrphy
-            m.submodules.dramcore = self.dramcore
-            m.submodules.drambone = self.drambone
+        m.submodules.syscon = self.syscon
+        if hasattr(self, "ram"):
+            m.submodules.ram = self.ram
+        if hasattr(self, "uart"):
+            m.submodules.uart = self.uart
+            comb += self.uart.cts_i.eq(1)
+            comb += self.uart.dsr_i.eq(1)
+            comb += self.uart.ri_i.eq(0)
+            comb += self.uart.dcd_i.eq(1)
+            # sigh connect up the wishbone bus manually to deal with
+            # the mis-match on the data
+            uartbus = self.uart.bus
+            comb += uartbus.adr.eq(self.cvtuartbus.adr)
+            comb += uartbus.stb.eq(self.cvtuartbus.stb)
+            comb += uartbus.cyc.eq(self.cvtuartbus.cyc)
+            comb += uartbus.sel.eq(self.cvtuartbus.sel)
+            comb += uartbus.we.eq(self.cvtuartbus.we)
+            comb += uartbus.dat_w.eq(self.cvtuartbus.dat_w) # drops 8..31
+            comb += self.cvtuartbus.dat_r.eq(uartbus.dat_r) # drops 8..31
+            comb += self.cvtuartbus.ack.eq(uartbus.ack)
+            # aaand with the WB4-pipeline-to-WB3-classic mismatch, sigh
+            comb += uartbus.stall.eq(uartbus.cyc & ~uartbus.ack)
+            comb += self.cvtuartbus.stall.eq(uartbus.stall)
         if hasattr(self, "cpu"):
             m.submodules.intc = self.intc
             m.submodules.extcore = self.cpu
             m.submodules.dbuscvt = self.dbusdowncvt
             m.submodules.ibuscvt = self.ibusdowncvt
+            # create stall sigs, assume wishbone classic
+            #ibus, dbus = self.cvtibus, self.cvtdbus
+            #comb += ibus.stall.eq(ibus.stb & ~ibus.ack)
+            #comb += dbus.stall.eq(dbus.stb & ~dbus.ack)
+
+        m.submodules.arbiter = self._arbiter
+        m.submodules.decoder = self._decoder
+        if hasattr(self, "ddrphy"):
+            m.submodules.ddrphy = self.ddrphy
+            m.submodules.dramcore = self.dramcore
+            m.submodules.drambone = drambone = self.drambone
+            # grrr, same problem with drambone: not WB4-pipe compliant
+            comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack)
+
+        # add hyperram module
+        if hasattr(self, "hyperram"):
+            m.submodules.hyperram = hyperram = self.hyperram
+            # grrr, same problem with hyperram: not WB4-pipe compliant
+            comb += hyperram.bus.stall.eq(hyperram.bus.cyc & ~hyperram.bus.ack)
 
         # add blinky lights so we know FPGA is alive
         if platform is not None:
@@ -162,16 +490,36 @@ class DDR3SoC(SoC, Elaboratable):
             # wire up the CPU interrupts
             comb += self.cpu.irq.eq(self.intc.ip)
 
+        if platform is None:
+            return m
+
         # add uart16550 verilog source. assumes a directory
         # structure where ls2 has been checked out in a common
         # subdirectory as https://github.com/freecores/uart16550
-        if platform is not None:
-            opencores_16550 = "../../uart16550/rtl/verilog"
+        opencores_16550 = "../../uart16550/rtl/verilog"
+        pth = os.path.split(__file__)[0]
+        pth = os.path.join(pth, opencores_16550)
+        fname = os.path.abspath(pth)
+        print (fname)
+        self.uart.add_verilog_source(fname, platform)
+
+        if hasattr(self, "spi0"):
+            # add Tercel verilog source. assumes a directory
+            # structure where ls2 has been checked out in a common
+            # subdirectory as https://git.libre-soc.org/git/microwatt.git
+            raptor_tercel = "../../microwatt/tercel"
             pth = os.path.split(__file__)[0]
-            pth = os.path.join(pth, opencores_16550)
+            pth = os.path.join(pth, raptor_tercel)
             fname = os.path.abspath(pth)
             print (fname)
-            self.uart.add_verilog_source(fname, platform)
+            self.spi0.add_verilog_source(fname, platform)
+
+        # add the main core
+        pth = os.path.split(__file__)[0]
+        pth = os.path.join(pth, '../external_core_top.v')
+        fname = os.path.abspath(pth)
+        with open(fname) as f:
+            platform.add_file(fname, f)
 
         return m
 
@@ -180,63 +528,191 @@ class DDR3SoC(SoC, Elaboratable):
         # and at the moment that's just UART tx/rx.
         ports = []
         ports += [self.uart.tx_o, self.uart.rx_i]
+        if hasattr(self, "hyperram"):
+            ports += list(self.hyperram.ports())
+        if hasattr(self, "ddrphy"):
+            if hasattr(self.ddrphy, "pads"): # real PHY
+                ports += list(self.ddrphy.pads.fields.values())
+            else: # FakePHY, get at the dfii pads, stops deletion of nets
+                for phase in self.dramcore.dfii.master.phases:
+                    print ("dfi master", phase)
+                    ports += list(phase.fields.values())
+                for phase in self.dramcore.dfii.slave.phases:
+                    print ("dfi master", phase)
+                    ports += list(phase.fields.values())
+                for phase in self.dramcore.dfii._inti.phases:
+                    print ("dfi master", phase)
+                    ports += list(phase.fields.values())
+        ports += [ClockSignal(), ResetSignal()]
         return ports
 
-if __name__ == "__main__":
+def build_platform(fpga, firmware):
 
-    # create a platform selected from the toolchain. defaults to VERSA_ECP5
-    # only VERSA_ECP5 will work for now because of the DDR3 module
-    fpga = "versa_ecp5"
-    if len(sys.argv) >= 2:
-        fpga = sys.argv[1]
+    # create a platform selected from the toolchain. 
     platform_kls =  {'versa_ecp5': VersaECP5Platform,
+                     'versa_ecp5_85': VersaECP5Platform85,
                      'ulx3s': ULX3S_85F_Platform,
                      'arty_a7': ArtyA7_100Platform,
+                     'isim': IcarusVersaPlatform,
                      'sim': None,
                     }[fpga]
     toolchain = {'arty_a7': "yosys_nextpnr",
                  'versa_ecp5': 'Trellis',
+                 'versa_ecp5_85': 'Trellis',
+                 'isim': 'Trellis',
                  'ulx3s': 'Trellis',
                  'sim': None,
                 }.get(fpga, None)
+    dram_cls = {'arty_a7': None,
+                 'versa_ecp5': MT41K64M16,
+                 'versa_ecp5_85': MT41K64M16,
+                 #'versa_ecp5': MT41K256M16,
+                 'ulx3s': None,
+                 'sim': MT41K256M16,
+                 'isim': MT41K64M16,
+                }.get(fpga, None)
     if platform_kls is not None:
         platform = platform_kls(toolchain=toolchain)
+        if fpga == 'versa_ecp5_85':
+            platform.speed = "7" # HACK. speed grade 7, sigh
     else:
         platform = None
 
-    # select a firmware file
-    firmware = None
+    print ("platform", fpga, firmware, platform)
+
+    # set clock frequency
+    clk_freq = 70e6
+    if fpga == 'sim':
+        clk_freq = 100e6
+    if fpga == 'isim':
+        clk_freq = 50e6 # below 50 mhz, stops DRAM being enabled
+    if fpga == 'versa_ecp5':
+        clk_freq = 50e6 # crank right down to test hyperram
+    if fpga == 'versa_ecp5_85':
+        clk_freq = 55e6
+    if fpga == 'arty_a7':
+        clk_freq = 50e6
+    if fpga == 'ulx3s':
+        clk_freq = 40.0e6
+
+    # select a firmware address
     fw_addr = None
-    if len(sys.argv) >= 3:
-        firmware = sys.argv[2]
+    if firmware is not None:
         fw_addr = 0x0000_0000
 
+    print ("fpga", fpga, "firmware", firmware)
+
+    # get UART resource pins
     if platform is not None:
-        # get DDR and UART resource pins
-        ddr_pins = platform.request("ddr3", 0,
-                                    dir={"dq":"-", "dqs":"-"},
-                                    xdr={"clk":4, "a":4, "ba":4, "clk_en":4,
-                                         "odt":4, "ras":4, "cas":4, "we":4})
         uart_pins = platform.request("uart", 0)
     else:
-        ddr_pins = None
         uart_pins = Record([('tx', 1), ('rx', 1)], name="uart_0")
 
+    # get DDR resource pins, disable if clock frequency is below 50 mhz for now
+    ddr_pins = None
+    if (clk_freq > 50e6 and platform is not None and
+        fpga in ['versa_ecp5', 'versa_ecp5_85', 'arty_a7', 'isim']):
+        ddr_pins = platform.request("ddr3", 0,
+                                    dir={"dq":"-", "dqs":"-"},
+                                    xdr={"rst": 4, "clk":4, "a":4,
+                                         "ba":4, "clk_en":4,
+                                         "odt":4, "ras":4, "cas":4, "we":4,
+                                         "cs": 4})
+
+    # Get SPI resource pins
+    spi_0_pins = None
+    if platform is not None and fpga in ['rcs_arctic_tern_bmc_card']:
+        if toolchain == 'Trellis':
+            # The ECP5 series FPGAs handle the SPI clock directly on
+            # the FPGA configuration Flash device
+            spi_0_pins = platform.request("spi_0", 0,
+                                        dir={"dq":"io", "cs_n":"o"},
+                                        xdr={"dq": 1, "cs_n": 1})
+        else:
+            spi_0_pins = platform.request("spi_0", 0,
+                                        dir={"dq":"io", "cs_n":"o", "clk":"o"},
+                                        xdr={"dq": 1, "cs_n": 1, "clk": 0})
+
+    # Get HyperRAM pins
+    hyperram_pins = None
+    if platform is None:
+        hyperram_pins = HyperRAMPads()
+    elif fpga in ['isim']:
+        hyperram_ios = HyperRAMResource(0, cs_n="B11",
+                                        dq="D4 D3 F4 F3 G2 H2 D2 E2",
+                                        rwds="U13", rst_n="T13", ck_p="V10",
+                                        # ck_n="D12" - for later (DDR)
+                                        attrs=Attrs(IOSTANDARD="LVCMOS33"))
+        platform.add_resources(hyperram_ios)
+        hyperram_pins = platform.request("hyperram")
+        print ("isim a7 hyperram", hyperram_ios)
+    # Digilent Arty A7-100t
+    elif platform is not None and fpga in ['arty_a7']:
+        hyperram_ios = HyperRAMResource(0, cs_n="B11",
+                                        dq="D4 D3 F4 F3 G2 H2 D2 E2",
+                                        rwds="U13", rst_n="T13", ck_p="V10",
+                                        # ck_n="D12" - for later (DDR)
+                                        attrs=Attrs(IOSTANDARD="LVCMOS33"))
+        platform.add_resources(hyperram_ios)
+        hyperram_pins = platform.request("hyperram")
+        print ("arty a7 hyperram", hyperram_ios)
+    # VERSA ECP5
+    elif platform is not None and fpga in ['versa_ecp5', 'versa_ecp5_85']:
+        hyperram_ios = HyperRAMResource(0, cs_n="B13",
+                                        dq="E14 C10 B10 E12 D12 A9 D11 D14",
+                                        rwds="C14", rst_n="E13", ck_p="D13",
+                                        attrs=Attrs(IO_TYPE="LVCMOS33"))
+        platform.add_resources(hyperram_ios)
+        hyperram_pins = platform.request("hyperram")
+        print ("versa ecp5 hyperram", hyperram_ios)
+    print ("hyperram pins", hyperram_pins)
+
     # set up the SOC
-    soc = DDR3SoC(ddrphy_addr=0xff000000, # DRAM firmware init base
-                  dramcore_addr=0x80000000,
-                  ddr_addr=0x10000000,
+    soc = DDR3SoC(fpga=fpga, dram_cls=dram_cls,
+                  # check microwatt_soc.h for these
+                  ddrphy_addr=0xff000000,   # DRAM_INIT_BASE firmware base
+                  dramcore_addr=0xc8000000, # DRAM_CTRL_BASE
+                  ddr_addr=0x40000000,      # DRAM_BASE
+                  spi0_addr=0x10000000,     # SPI0_BASE
+                  spi0_cfg_addr=0xc0003000, # SPI0_CTRL_BASE
+                  hyperram_addr=0xa0000000, # HYPERRAM_BASE
                   fw_addr=fw_addr,
+                  #fw_addr=None,
                   ddr_pins=ddr_pins,
                   uart_pins=uart_pins,
-                  firmware=firmware)
+                  spi_0_pins=spi_0_pins,
+                  hyperram_pins=hyperram_pins,
+                  firmware=firmware,
+                  clk_freq=clk_freq,
+                  add_cpu=True)
+
+    if toolchain == 'Trellis':
+        # add -abc9 option to yosys synth_ecp5
+        #os.environ['NMIGEN_synth_opts'] = '-abc9 -nowidelut'
+        #os.environ['NMIGEN_synth_opts'] = '-abc9'
+        os.environ['NMIGEN_synth_opts'] = '-nowidelut'
 
     if platform is not None:
         # build and upload it
-        platform.build(soc, do_program=True)
+        if fpga == 'isim':
+            platform.build(soc, do_program=False,
+                                do_build=True, build_dir="build_simsoc")
+        else:
+            platform.build(soc, do_program=True)
     else:
         # for now, generate verilog
         vl = verilog.convert(soc, ports=soc.ports())
         with open("ls2.v", "w") as f:
             f.write(vl)
 
+
+# urrr this gets exec()d by the build process without arguments
+# which screws up.  use the arty_a7_ls2.py etc. with no arguments
+if __name__ == '__main__':
+    fpga = None
+    firmware = None
+    if len(sys.argv) >= 2:
+        fpga = sys.argv[1]
+    if len(sys.argv) >= 3:
+        firmware = sys.argv[2]
+    build_platform(fpga, firmware)