Merge remote-tracking branch 'origin/ddr3' orangecrab-ddr3
authorTobias Platen <tplaten@posteo.de>
Fri, 17 Feb 2023 17:51:46 +0000 (18:51 +0100)
committerTobias Platen <tplaten@posteo.de>
Fri, 17 Feb 2023 17:51:46 +0000 (18:51 +0100)
1  2 
coldboot/coldboot.c
runsimsoc2.sh
simsoc.ys
src/ecp5_crg.py
src/ls2.py

index 65444ee2963a082631fa0cc5b04b6df0b9d2dd8e,0f42a827f96383fdc27f0f85c4f827b148b060fa..477f74ac803b2cc095e30fdaab0b840f98fcc772
  
  #include "elf64.h"
  
++//#ifdef ICARUS_DEBUG
++//HACK
++#define DEBUG2 0xfff00018
++#define DEBUG3 0xfff0001C
++#define DEBUG4 0xfff00020
++//#endif
++
 +#define ORANGECRAB_MODE_REGISTERS 0x0320, 0x0002, 0x0200, 0x0000
 +
 +static inline void mtspr(int sprnum, unsigned long val)
 +{
 +    __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
 +}
 +
  static inline uint32_t read32(const void *addr)
  {
        return *(volatile uint32_t *)addr;
@@@ -94,17 -60,14 +101,21 @@@ void isr(void) 
  
  }
  
 +extern void crank_up_qspi_level1(void);
 +extern int host_spi_flash_init(void);
 +
++#if 0
  static bool fl_read(void *dst, uint32_t offset, uint32_t size)
  {
      uint8_t *d = dst;
      memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size);
      return true;
  }
++#endif
  
 -static unsigned long copy_flash(unsigned int offset)
++#if 0
++//requires working dram
 +static unsigned long copy_flash(unsigned int offset, unsigned int dst_offs)
  {
      Elf64_Ehdr ehdr;
      Elf64_Phdr ph;
@@@ -170,6 -123,6 +181,7 @@@ dump
  
      return -1ul;
  }
++#endif
  
  
  // XXX
@@@ -200,6 -153,6 +212,7 @@@ int gram_write(const struct gramCtx *ct
        return 0;
  }
  
++
  int main(void) {
        const int kNumIterations = 14;
        int res, failcnt = 0;
      unsigned long ftr, spi_offs=0x0;
        volatile uint32_t *ram = (uint32_t*)MEMORY_BASE;
  
++      #ifdef ICARUS_DEBUG
++      writel(0xFF,DEBUG2);
++      #endif
        console_init();
++      #ifdef ICARUS_DEBUG
++      writel(0xFC,DEBUG2);
++      #endif
        //puts("Firmware launched...\n");
  
  #if 1
      return 0;
  #endif
  
 -    for (int persistence=0; persistence < 1000; persistence++) {
 +    // init DRAM only if SYSCON says it exists (duh)
 +    if (ftr & SYS_REG_INFO_HAS_DRAM)
 +    {
++              
          puts("DRAM init... ");
  
          struct gramCtx ctx;
          };
  #endif
          struct gramProfile profile2;
--        gram_init(&ctx, &profile, (void*)MEMORY_BASE,
--                                  (void*)DRAM_CTRL_BASE,
--                                  (void*)DRAM_INIT_BASE);
++        #ifdef ICARUS_DEBUG
++        writel(0xCC,DEBUG2);
++        #endif
++        gram_init(&ctx, &profile, (void*)MEMORY_BASE,          /* "Main" memory alias, either BRAM or DRAM */
++        
++        #if 0
++        dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET|DFII_COMMAND_CS); //if software_control
++        //enable clocks and https://en.wikipedia.org/wiki/On-die_termination
++        
++        dfii_initseq(ctx, profile); -> writes to core
++              gram_load_calibration(ctx, profile); -> writes to phy
++        
++          dfii_setcontrol(ctx, DFII_CONTROL_SEL|DFII_CONTROL_RESET);
++          #endif
++              
++        
++        
++        /* TODO verify this, most likely the error is in the phy
++         *  # create the core (bridge from PHY to DFI)
++         * see gram/core/__init__.py
++         * self.dfii = DFIInjector(
++            csr_bank=CSRPrefixProxy(bank, "dfii"),
++            * csr_bank.csr(4, "w")  # sel, clk_en, odt, reset
++            * per phase :
++            *         self._command = csr_bank.csr(6, "w")
++                      *       self._command_issue = csr_bank.csr(1, "w")
++         * =====================================================================
++            dramcore = gramCore(phy=self.ddrphy,
++                                geom_settings=ddrmodule.geom_settings,
++                                timing_settings=ddrmodule.timing_settings,
++                                #features=features,
++                                clk_freq=self.dram_clk_freq)
++        */
++                                  (void*)DRAM_CTRL_BASE,       /* LiteDRAM control registers dramcore_addr=0xc8000000 */
++                                  (void*)0xfff00000);            /* guess: this was wrong => now fixed */
++        #ifdef ICARUS_DEBUG
++        writel(0xDD,DEBUG2);
++        #endif
          puts("done\n");
  
          puts("MR profile: ");
          puts("Auto calibrating... ");
          res = gram_generate_calibration(&ctx, &profile2);
          if (res != GRAM_ERR_NONE) {
--            puts("failed\n");
++                      //guess: always fails in simulation, only sometimes on real hardware
++                      #ifdef ICARUS_DEBUG
++                      writel(0xFFFFAAAA,DEBUG1);//stop simulation
++                      #endif
++            puts("fAiLED\n"); //TODO: find other error locations
++            while(1){}
              gram_load_calibration(&ctx, &profile);
          } else {
++                      //puts("NO_ERROR\n");
              gram_load_calibration(&ctx, &profile2);
          }
          puts("done\n");
          for (size_t i = 0; i < kNumIterations; i++) {
              writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
          }
++        
  
  #if 0
          for (int dly = 0; dly < 8; dly++) {
              uart_writeuint32(profile2.rdly_p1);
              gram_load_calibration(&ctx, &profile2);
              for (size_t i = 0; i < kNumIterations; i++) {
--                if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
++                              uint32_t r = readl((unsigned long)&(ram[i]));
++                              uint32_t c = (0xDEAF0000 | i*4);
++                if (r!=c) {
                      puts("fail : *(0x");
                      uart_writeuint32((unsigned long)(&ram[i]));
--                    puts(") = ");
--                    uart_writeuint32(readl((unsigned long)&(ram[i])));
++                    puts(") = [r]");
++                    uart_writeuint32(r);
++                    puts(" != [c]");
++                    uart_writeuint32(r);
                      puts("\n");
                      failcnt++;
  
  
                  if (failcnt > 10) {
                      puts("Test canceled (more than 10 errors)\n");
++                    #ifdef ICARUS_DEBUG
++                    writel(0xFFFFBBBB,DEBUG1);//stop simulation
++                    #endif
                      break;
                  }
              }
      }
  #endif
        puts("done\n");
 -
 -    // memcpy from SPI Flash to SDRAM then boot
++      #ifdef ICARUS_DEBUG
++      writel(0xFFFFFFFF,DEBUG1);//stop simulation
++      #endif
 +
 +#if 0 // ooo, annoying: won't work. no idea why
 +    // temporary hard-hack: boot directly from QSPI. really
 +    // should do something like detect at least... something
 +    if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH))
 +    {
 +        // jump to absolute address
 +        mtspr(8, SPI_FLASH_BASE); // move address to LR
 +        __asm__ volatile("blr");
 +        return 0;
 +    }
 +#endif
++      
++      #if 0
 +    // memcpy from SPI Flash then boot
      if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) &&
 -        (ftr & SYS_REG_INFO_HAS_DRAM) &&
          (failcnt == 0))
      {
 +/*
 +        puts("ELF @ QSPI\n");
          // identify ELF, copy if present, and get the start address
 -        unsigned long faddr = copy_flash(spi_offs);
 +        unsigned long faddr = copy_flash(spi_offs,
 +                                         0x600000); // hack!
          if (faddr != -1ul) {
              // jump to absolute address
 +            mtspr(8, faddr); // move address to LR
 +            __asm__ volatile("blr");
 +
 +            // works with head.S which copies r3 into ctr then does bctr
              return faddr;
          }
 +        puts("copy QSPI\n");
 +*/
 +        // another terrible hack: copy from flash at offset 0x600000
 +        // a block of size 0x600000 into mem address 0x600000, then
 +        // jump to it.  this allows a dtb image to be executed
 +        puts("copy QSPI\n");
 +        volatile uint32_t *mem = (uint32_t*)0x1000000;
 +        fl_read(mem,       // destination in RAM
 +                0x600000,  // offset into QSPI
 +                0x8000); // length - shorter (testing) 0x8000);
 +                //0x1000000); // length 
 +        puts("dump mem\n");
 +        for (int i=0;i<256;i++) {
 +          tmp = readl((unsigned long)&(mem[i]));
 +          uart_writeuint32(tmp);
 +          puts(" ");
 +          if ((i & 0x7) == 0x7) puts("\r\n");
 +        }
 +        puts("\r\n");
 +        mtspr(8, 0x1000000); // move address to LR
 +        __asm__ volatile("blr");
      }
++    #endif
++
  
        return 0;
  }
diff --cc runsimsoc2.sh
Simple merge
diff --cc simsoc.ys
index a4adcefdcd7103aa29cc6578bdaf470b89e0f845,72b0f6be4eb71ab2b9d9f45914cd4afaf3c332ef..deaf105f1f6a3c81cbc298ab77374742ddaf7e16
+++ b/simsoc.ys
@@@ -22,14 -16,9 +22,12 @@@ read_verilog  ../uart16550/rtl/verilog/
  read_verilog  ../uart16550/rtl/verilog/uart_receiver.v
  read_verilog  ../uart16550/rtl/verilog/uart_tfifo.v
  read_verilog  ../uart16550/rtl/verilog/uart_wb.v
 +
 +# Tercel QSPI
  read_verilog  ../tercel-qspi/tercel/phy.v 
  read_verilog  ../tercel-qspi/tercel/wishbone_spi_master.v
 -read_verilog  ../verilog-wishbone/rtl/wb_async_reg.v
 +# WB Async Bridge
 +read_verilog  ../verilog-wishbone/rtl/wb_async_reg.v 
  # errors in the ethmac rtl, comment out for now
  #read_verilog  ../ethmac/rtl/verilog/eth_clockgen.v
  #read_verilog  ../ethmac/rtl/verilog/eth_cop.v
diff --cc src/ecp5_crg.py
index 11bb8a957a6f1dd2e4f8bc3501c2b5879d9d237a,36c8f1dd4f5ecde53a30f8b949862abfdb828117..6130729d49e97f2e0a1248961b330a9eb59b9a7b
@@@ -169,70 -170,14 +170,81 @@@ class PLL(Elaboratable)
  
  
  class ECP5CRG(Elaboratable):
++<<<<<<< HEAD
 +    def __init__(self, sys_clk_freq=100e6, dram_clk_freq=None,
 +                       pod_bits=25, sync_bits=26, need_bridge=False):
 +        """when dram_clk_freq=None, a dramsync domain is still created
 +        but it is an alias of sync domain.  likewise the 2x
 +        """
 +        self.sys_clk_freq = sys_clk_freq
 +        self.dram_clk_freq = dram_clk_freq
 +        self.pod_bits = pod_bits # for init domain
 +        self.sync_bits = sync_bits # for all other domains
 +        self.need_bridge = need_bridge # insert ECLKBRIDGECS
 +        assert pod_bits <= sync_bits, \
 +            "power-on-delay bits %d should " \
 +            " be less than sync_bits %d" % (pod_bits, sync_bits)
 +
 +    def phase2_domain(self, m, pll, name, freq, esyncb):
 +        """creates a domain that can be used with xdr=4 platform resources.
 +
 +        this requires creating a domain at *twice* the frequency,
 +        then halving it.  the doubled-frequency can then go to the
 +        eclk parts of a 4-phase IOPad:
 +            pads.a.o_clk.eq(ClockSignal("dramsync")),
 +            pads.a.o_fclk.eq(ClockSignal("dramsync2x")),
 +        """
 +
 +        # create names
 +        cd2x = "%s2x" % name
 +        cd2x_ub = cd2x+"_unbuf"
 +        cd = name
 +        # Generating sync2x from extclk
 +        cd_2x = ClockDomain(cd2x, local=False)
 +        cd_2x_unbuf = ClockDomain(cd2x_ub, local=False, reset_less=True)
 +        cd_ = ClockDomain("%s" % name, local=False)
 +
 +        # create PLL clocks
 +        pll.create_clkout(ClockSignal(cd2x_ub), 2*freq)
 +        if esyncb:
 +            if self.need_bridge:
 +                sys2x_clk_ecsout = Signal()
 +                m.submodules["%s_eclkbridgecs" % cd] = Instance("ECLKBRIDGECS",
 +                    i_CLK0 = ClockSignal(cd2x_ub),
 +                    i_SEL  = 0,
 +                    o_ECSOUT = sys2x_clk_ecsout)
 +                m.submodules["%s_eclksyncb" % cd] = Instance("ECLKSYNCB",
 +                    i_ECLKI = sys2x_clk_ecsout,
 +                    i_STOP  = 0,
 +                    o_ECLKO = ClockSignal(cd2x))
 +            else:
 +                m.submodules["%s_eclksyncb" % cd] = Instance("ECLKSYNCB",
 +                    i_ECLKI = ClockSignal(cd2x_ub),
 +                    i_STOP  = 0,
 +                    o_ECLKO = ClockSignal(cd2x))
 +        else:
 +            m.d.comb += ClockSignal(cd2x).eq(ClockSignal(cd2x_ub)) # no esyncb
 +        m.domains += cd_2x_unbuf
 +        m.domains += cd_2x
 +        m.domains += cd_
 +
 +        # # Generating sync from sync2x
 +        m.submodules["%s_clkdivf" % cd] = Instance("CLKDIVF",
 +            p_DIV="2.0",
 +            i_ALIGNWD=0,
 +            i_CLKI=ClockSignal(cd2x),
 +            i_RST=0,
 +            o_CDIVX=ClockSignal(cd))
++=======
+     def __init__(self, sys_clk_freq=100e6, core_clk_freq=100e6, pod_bits=25):
+         self.sys_clk_freq = sys_clk_freq
+         self.core_clk_freq = core_clk_freq
+         self.pod_bits = pod_bits
++>>>>>>> origin/ddr3
+         # DDR clock control signals
+         self.ddr_clk_stop = Signal()
+         self.ddr_clk_reset = Signal()
  
      def elaborate(self, platform):
          m = Module()
          gsr0 = Signal()
          gsr1 = Signal()
  
 -        m.submodules += [
 -            Instance("FD1S3AX", p_GSR="DISABLED",
 +        m.submodules.gsr0 = Instance("FD1S3AX", p_GSR="DISABLED",
                                  i_CK=ClockSignal("rawclk"),
                                  i_D=~reset,
 -                                o_Q=gsr0),
 -            Instance("FD1S3AX", p_GSR="DISABLED",
 +                                o_Q=gsr0)
 +        m.submodules.gsr1 = Instance("FD1S3AX", p_GSR="DISABLED",
                                  i_CK=ClockSignal("rawclk"),
                                  i_D=gsr0,
 -                                o_Q=gsr1),
 -            Instance("SGSR", i_CLK=ClockSignal("rawclk"),
 -                             i_GSR=gsr1),
 -        ]
 +                                o_Q=gsr1)
 +        m.submodules.sgsr = Instance("SGSR", i_CLK=ClockSignal("rawclk"),
 +                             i_GSR=gsr1)
  
-         # PLL
-         m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~reset)
-         # Power-on delay (655us)
+         # Power-on delay
          podcnt = Signal(self.pod_bits, reset=-1)
 +        synccnt = Signal(self.sync_bits, reset=-1)
          pod_done = Signal()
++<<<<<<< HEAD
 +        sync_done = Signal()
 +        with m.If((synccnt != 0) & pll.locked):
 +            m.d.rawclk += synccnt.eq(synccnt-1)
 +        with m.If((podcnt != 0) & pll.locked):
++=======
+         with m.If(podcnt != 0):
++>>>>>>> origin/ddr3
              m.d.rawclk += podcnt.eq(podcnt-1)
          m.d.rawclk += pod_done.eq(podcnt == 0)
 +        m.d.rawclk += sync_done.eq(synccnt == 0)
  
++<<<<<<< HEAD
 +        # and reset which only drops when the PLL is done and pod completes
++=======
+         # PLL
+         m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~pod_done|~reset)
+         # Generating sync2x (200Mhz) and init (25Mhz) from extclk
+         # sync is our "nest" frequency, cpu is the core frequency
+         # On ASIC, core >= nest, on FPGA, cpu <= nest...
+         cd_sync2x = ClockDomain("sync2x", local=False)
+         cd_sync2x_unbuf = ClockDomain("sync2x_unbuf",
+                                       local=False, reset_less=True)
+         cd_init = ClockDomain("init", local=False)
+         cd_sync = ClockDomain("sync", local=False)
+         cd_cpu = ClockDomain("cpu", local=False)
+         cd_dramsync = ClockDomain("dramsync", local=False)
+         # create PLL clocks
+         pll.set_clkin_freq(platform.default_clk_frequency)
+         pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq)
+         pll.create_clkout(ClockSignal("init"), 25e6)
+         if self.sys_clk_freq == self.core_clk_freq:
+             m.d.comb += ClockSignal("cpu").eq(ClockSignal("sync"))
+         else:
+             pll.create_clkout(ClockSignal("cpu"), self.core_clk_freq)
+         m.submodules += Instance("ECLKSYNCB",
+                 i_ECLKI = ClockSignal("sync2x_unbuf"),
+                 i_STOP  = self.ddr_clk_stop,
+                 o_ECLKO = ClockSignal("sync2x"))
+         m.domains += cd_sync2x_unbuf
+         m.domains += cd_sync2x
+         m.domains += cd_init
+         m.domains += cd_sync
+         m.domains += cd_cpu
+         m.domains += cd_dramsync
++>>>>>>> origin/ddr3
          reset_ok = Signal(reset_less=True)
 +        sync_reset_ok = Signal(reset_less=True)
          m.d.comb += reset_ok.eq(~pll.locked|~pod_done)
++<<<<<<< HEAD
 +        m.d.comb += sync_reset_ok.eq(~pll.locked|~sync_done)
++=======
+         m.d.comb += ResetSignal("init").eq(reset_ok)
+         m.d.comb += ResetSignal("sync").eq(reset_ok)
+         m.d.comb += ResetSignal("cpu").eq(reset_ok)
+         m.d.comb += ResetSignal("dramsync").eq(reset_ok|self.ddr_clk_reset)
++>>>>>>> origin/ddr3
  
 -        # # Generating sync (100Mhz) from sync2x
 +        # create PLL input clock from platform default frequency
 +        pll.set_clkin_freq(platform.default_clk_frequency)
  
++<<<<<<< HEAD
 +        # single or double main sync clock domain.  double needs a 2nd PLL
 +        # to match up with the CLKESYNCB, one per quadrant inside the ECP5
 +        if self.dram_clk_freq is not None:
 +            m.domains += ClockDomain("sync_unbuf", local=False, reset_less=True)
 +            m.domains += ClockDomain("sync", local=False)
 +            pll.create_clkout(ClockSignal("sync_unbuf"), self.sys_clk_freq)
 +            m.d.comb += ClockSignal("sync").eq(ClockSignal("sync_unbuf"))
 +        else:
 +            # Generating sync2x and sync from extclk, which is *only* how
 +            # xdr=4 can be requested on the sync domain. also do not request
 +            # an edge-clock-stop
 +            self.phase2_domain(m, pll, "sync", self.sys_clk_freq, True)
 +            m.d.comb += ResetSignal("sync2x").eq(sync_reset_ok)
 +        m.d.comb += ResetSignal("sync").eq(sync_reset_ok)
 +
 +        # DRAM clock: if not requested set to sync, otherwise create with
 +        # a CLKESYNCB (which is set to no-stop at the moment)
 +        if self.dram_clk_freq is not None:
 +            self.phase2_domain(m, pll, "dramsync", self.dram_clk_freq, True)
 +        else:
 +            # alias dramsync and dramsync2x to sync and sync2x
 +            cd_dramsync = ClockDomain("dramsync", local=False)
 +            m.domains += cd_dramsync
 +            m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync"))
 +            # and a dram 2x sigh
 +            cd_dramsync2x = ClockDomain("dramsync2x", local=False)
 +            m.domains += cd_dramsync2x
 +            m.d.comb += ClockSignal("dramsync2x").eq(ClockSignal("sync2x"))
 +        # resets for the dram domains
 +        m.d.comb += ResetSignal("dramsync2x").eq(sync_reset_ok)
 +        m.d.comb += ResetSignal("dramsync").eq(sync_reset_ok)
++=======
+         m.submodules += Instance("CLKDIVF",
+             p_DIV="2.0",
+             i_ALIGNWD=0,
+             i_CLKI=ClockSignal("sync2x"),
+             i_RST=ResetSignal("dramsync"),
+             o_CDIVX=ClockSignal("sync"))
++>>>>>>> origin/ddr3
  
 -        # temporarily set dram sync clock exactly equal to main sync
 -        m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync"))
 +        # create 25 mhz "init" clock, straight (no 2x phase stuff)
 +        # this domain can be used before all others, has its own delay
 +        # (sync_bits)
 +        cd_init = ClockDomain("init", local=False)
 +        pll.create_clkout(ClockSignal("init"), 25e6)
 +        m.domains += cd_init
 +        m.d.comb += ResetSignal("init").eq(reset_ok)
  
          return m
  
diff --cc src/ls2.py
index 5ffab701792d16ed503b67844a55692f28fc1991,3f1e78a3f985cbb5467a9b377a703448fd6ac18e..1d4035979ff3f13210679a3d906d2b635f8ae96b
@@@ -240,20 -256,17 +261,25 @@@ class WB64to32Convert(Elaboratable)
  class DDR3SoC(SoC, Elaboratable):
      def __init__(self, *,
                   fpga,
 -                 dram_cls,
 -                 uart_pins, spi_0_pins, ethmac_0_pins,
 -                 ddr_pins, ddrphy_addr, dramcore_addr, ddr_addr,
 -                 fw_addr=0x0000_0000,
 -                 firmware=None,
 -                 spi0_addr, spi0_cfg_addr,
 -                 eth0_cfg_addr, eth0_irqno,
 +                 dram_cls=None,
 +                 uart_pins=None, spi_0_pins=None, ethmac_0_pins=None,
 +                 ddr_pins=None, ddrphy_addr=None,
 +                 dramcore_addr=None, ddr_addr=None,
 +                 fw_addr=0x0000_0000, firmware=None,
 +                 uart_addr=None, uart_irqno=0,
 +                 spi0_addr=None, spi0_cfg_addr=None,
 +                 eth0_cfg_addr=None, eth0_irqno=None,
                   hyperram_addr=None,
                   hyperram_pins=None,
++<<<<<<< HEAD
 +                 xics_icp_addr=None, xics_ics_addr=None,
 +                 clk_freq=50e6,
 +                 dram_clk_freq=None,
 +                 core_clk_freq=50e6,
++=======
+                  nest_freq=50e6,
+                  core_freq=50e6,
++>>>>>>> origin/ddr3
                   add_cpu=True):
  
          # wishbone routing is as follows:
          #       |     |
          #       +--+--+
          #          |
+         #      WBAsyncBridge
+         #          |
          #      64to32DownCvt
          #          |
 -        #       arbiter------------------------------------------+
 -        #          |                                             |
 -        #   +---decoder----+--------+---------+-------+--------+ |
 -        #   |      |       |        |         |       |        | |
 -        #  uart  XICS    CSRs     DRAM     XIP SPI HyperRAM   EthMAC
 +        #       arbiter------------------------------------------------------+
 +        #          |                                                         |
 +        #   +---decoder----+--------+---------------+-------------+--------+ |
 +        #   |      |       |        |               |             |        | |
 +        #   |      |       |  WBAsyncBridge         |             |        | |
 +        #   |      |       |        |               |             |        | |
 +        #  uart  XICS    CSRs     DRAM          XIP SPI       HyperRAM   EthMAC
  
          # set up wishbone bus arbiter and decoder. arbiter routes,
          # decoder maps local-relative addressed satellites to global addresses
  
          # set up clock request generator
          pod_bits = 25
++<<<<<<< HEAD
 +        sync_bits = 26
 +        need_bridge=False
 +        if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s',
 +                    'orangecrab','orangecrab_isim', 'rcs_arctic_tern_bmc_card']:
 +            if fpga in ['isim','orangecrab_isim']:
 +                pod_bits = 5
 +                sync_bits = 6
 +            if fpga in ['orangecrab', 'orangecrab_sim',
 +                        'rcs_arctic_tern_bmc_card']:
 +                need_bridge=True
 +            self.crg = ECP5CRG(clk_freq, dram_clk_freq=dram_clk_freq,
 +                               pod_bits=pod_bits, sync_bits=sync_bits,
 +                               need_bridge=need_bridge)
++=======
+         if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s']:
+             if fpga in ['isim']:
+                 pod_bits = 6
+             self.crg = ECP5CRG(sys_clk_freq=nest_freq, core_clk_freq=core_freq,
+                                          pod_bits=pod_bits)
++>>>>>>> origin/ddr3
          if fpga in ['arty_a7']:
-             self.crg = ArtyA7CRG(clk_freq)
+             self.crg = ArtyA7CRG(core_freq)
  
 -        # set up CPU, with 64-to-32-bit downconverters
 +        self.dram_clk_freq = dram_clk_freq
 +        if self.dram_clk_freq is None:
 +            self.dram_clk_freq = clk_freq
 +
 +        # set up CPU, with 64-to-32-bit downconverters, and a delayed Reset
          if add_cpu:
++<<<<<<< HEAD
 +            self.cpu = ExternalCore(name="ext_core")
 +
++=======
+             drs = DomainRenamer("cpu")
+             self.cpu = drs(ExternalCore(name="ext_core"))
+             if nest_freq == core_freq:
+                 # No Wishbone bridge required
+                 self.asbrdbus = self.cpu.dbus
+                 self.asbribus = self.cpu.ibus
+             else:
+                 # Asynchronous Wishbone bridge required
+                 asbrdbus = wishbone.Interface(addr_width=32, data_width=64,
+                                              granularity=8, features={'stall'})
+                 asbribus = wishbone.Interface(addr_width=32, data_width=64,
+                                              granularity=8, features={'stall'})
+                 self.dbusasyncbr = WBAsyncBridge(master_bus=self.cpu.dbus,
+                                              slave_bus=asbrdbus,
+                                              master_clock_domain="cpu",
+                                              slave_clock_domain=None,
+                                              address_width=32, data_width=64,
+                                              granularity=8, features={'stall'})
+                 self.ibusasyncbr = WBAsyncBridge(master_bus=self.cpu.ibus,
+                                              slave_bus=asbribus,
+                                              master_clock_domain="cpu",
+                                              slave_clock_domain=None,
+                                              address_width=32, data_width=64,
+                                              granularity=8, features={'stall'})
+                 self.asbrdbus = asbrdbus
+                 self.asbribus = asbribus
++>>>>>>> origin/ddr3
              cvtdbus = wishbone.Interface(addr_width=30, data_width=32,
                                           granularity=8, features={'stall'})
              cvtibus = wishbone.Interface(addr_width=30, data_width=32,
              self._decoder.add(self.bootmem.bus, addr=fw_addr) # ROM at fw_addr
  
          # System Configuration info
 -        # offset executable ELF payload at 1 megabyte offset (1<<20)
 -        spi_offset = 1<<20 if (spi_0_pins is not None) else None
 +        # offset executable ELF payload at 6 megabyte offset (2<<20)
 +        spi_offset = 2<<20 if (spi_0_pins is not None) else None
          dram_offset = ddr_addr if (ddr_pins is not None) else None
++<<<<<<< HEAD
 +        self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq,
 +                                      mem_clk_freq=self.dram_clk_freq,
 +                                      core_clk_freq=core_clk_freq,
++=======
+         self.syscon = MicrowattSYSCON(sys_clk_freq=nest_freq,
+                                       core_clk_freq=core_freq,
++>>>>>>> origin/ddr3
                                        has_uart=(uart_pins is not None),
                                        spi_offset=spi_offset,
                                        dram_addr=dram_offset)
                                                      tRAS=44)}
          """
  
++<<<<<<< HEAD
 +        # DRAM Module. first, create the (triple) modules:
 +        # * DDR PHY
 +        # * gram Core: presents PHY with a DFI Interface
 +        # * gram Bone (aka gram-with-wishbone) connects wishbone to DFI
 +        # from there it gets a little complicated because of supporting
 +        # several options: simulation, synchronous, and asynchronous clocks.
 +        # dram_clk_freq can *never* be set equal to clk_freq, if it is,
 +        # it's assumed to be synchronous, and the dram Domains need renaming
 +
 +        if ddr_pins is not None: # or fpga == 'sim':
 +            ddrmodule = dram_cls(self.dram_clk_freq, "1:2") # match DDR3 P/N
 +
 +            # remap both the sync domain (wherever it occurs) and
 +            # the sync2x domain, if dram frequency is specified and
 +            # not equal to the core clock
 +            drs = None
 +            if dram_clk_freq is not None or fpga == 'sim':
 +                drs = lambda x: x
 +            else:
 +                drs = DomainRenamer({"sync": "dramsync",
 +                                     "sync2x": "dramsync2x"})
++=======
+         # DRAM Module
+         if ddr_pins is not None or fpga == 'sim':
+             ddrmodule = dram_cls(nest_freq, "1:2") # match DDR3 ASIC P/N
++>>>>>>> origin/ddr3
  
 -            #drs = lambda x: x
 -            drs = DomainRenamer("dramsync")
 +            features = set()
 +            if dram_clk_freq is None:
 +                features.add("stall")
  
 +            # create the PHY (fake one for sim)
              if fpga == 'sim':
 +                settings = sim_ddr3_settings(self.dram_clk_freq)
                  self.ddrphy = FakePHY(module=ddrmodule,
++<<<<<<< HEAD
 +                                      settings=settings,
 +                                      verbosity=SDRAM_VERBOSE_DBG,
 +                                      clk_freq=self.dram_clk_freq)
 +            else:
 +                self.ddrphy = drs(ECP5DDRPHY(ddr_pins,
 +                                             #features=features,
 +                                             sys_clk_freq=self.dram_clk_freq))
++=======
+                                       settings=sim_ddr3_settings(nest_freq),
+                                       verbosity=SDRAM_VERBOSE_DBG,
+                                       clk_freq=nest_freq)
+             else:
+                 self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=nest_freq))
+             self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr)
++>>>>>>> origin/ddr3
  
 +            # create the core (bridge from PHY to DFI)
              dramcore = gramCore(phy=self.ddrphy,
                                  geom_settings=ddrmodule.geom_settings,
                                  timing_settings=ddrmodule.timing_settings,
++<<<<<<< HEAD
 +                                #features=features,
 +                                clk_freq=self.dram_clk_freq)
 +            self.dramcore = drs(dramcore)
 +
 +            # create the wishbone presentation (wishbone to DFI)
 +            drambone = gramWishbone(dramcore, features=features)
 +            self.drambone = drs(drambone)
 +
 +        # this is the case where sys_clk === dram_clk. no ASync Bridge
 +        # needed, so just let the phy core and wb-dfi be connected
 +        # directly to WB decoder.  both are running in "sync" domain
 +        # (because of the DomainRenamer, above)
 +
 +        if ddr_pins is not None and dram_clk_freq is None:
 +            self.ddrphy_bus = self.ddrphy.bus
 +            self.dramcore_bus = self.dramcore.bus
 +            self.drambone_bus = self.drambone.bus
 +
 +        # this covers the case where sys_clk != dram_clk: three separate
 +        # ASync Bridges are constructed (!) and the interface that's to
 +        # be wired to the WB decoder is the async bus because that's running
 +        # in the "sync" domain.
 +
 +        if ddr_pins is not None and dram_clk_freq is not None:
 +            # Set up Wishbone asynchronous bridge
 +            pabus = wishbone.Interface(addr_width=self.ddrphy.bus.addr_width,
 +                                       data_width=self.ddrphy.bus.data_width,
 +                                       granularity=self.ddrphy.bus.granularity,
 +                                       features={'stall'})
 +            self.ddrphy_bus = pabus
 +            self.ddrphy_bus.memory_map = self.ddrphy.bus.memory_map
 +
 +            pabr = WBAsyncBridge(master_bus=self.ddrphy_bus,
 +                                 slave_bus=self.ddrphy.bus,
 +                                 master_clock_domain=None,
 +                                 slave_clock_domain="dramsync",
 +                                 address_width=self.ddrphy.bus.addr_width,
 +                                 data_width=self.ddrphy.bus.data_width,
 +                                 granularity=self.ddrphy.bus.granularity)
 +            self.ddrphy_async_br = pabr
 +
 +            # Set up Wishbone asynchronous bridge
 +            dab = wishbone.Interface(addr_width=self.dramcore.bus.addr_width,
 +                                     data_width=self.dramcore.bus.data_width,
 +                                     granularity=self.dramcore.bus.granularity,
 +                                     features={'stall'})
 +            self.dramcore_bus = dab
 +            self.dramcore_bus.memory_map = self.dramcore.bus.memory_map
 +
 +            dac = WBAsyncBridge(master_bus=self.dramcore_bus,
 +                                slave_bus=self.dramcore.bus,
 +                                master_clock_domain=None,
 +                                slave_clock_domain="dramsync",
 +                                address_width=self.dramcore.bus.addr_width,
 +                                data_width=self.dramcore.bus.data_width,
 +                                granularity=self.dramcore.bus.granularity)
 +            self.dramcore_async_br = dac
 +
 +            # Set up Wishbone asynchronous bridge
 +            bab = wishbone.Interface(addr_width=self.drambone.bus.addr_width,
 +                                     data_width=self.drambone.bus.data_width,
 +                                     granularity=self.drambone.bus.granularity,
 +                                     features={'stall'})
 +            self.drambone_bus = bab
 +            self.drambone_bus.memory_map = self.drambone.bus.memory_map
 +
 +            bab = WBAsyncBridge(master_bus=self.drambone_bus,
 +                                slave_bus=self.drambone.bus,
 +                                master_clock_domain=None,
 +                                slave_clock_domain="dramsync",
 +                                address_width=self.drambone.bus.addr_width,
 +                                data_width=self.drambone.bus.data_width,
 +                                granularity=self.drambone.bus.granularity)
 +            self.drambone_async_br = bab
 +
 +        if ddr_pins is not None:
 +            # Add wishbone decoders
 +            self._decoder.add(self.dramcore_bus, addr=dramcore_addr)
 +            self._decoder.add(self.drambone_bus, addr=ddr_addr)
 +            self._decoder.add(self.ddrphy_bus, addr=ddrphy_addr)
 +
 +        # additional SRAM at address if DRAM is not also at 0x0
 +        # (TODO, check Flash, and HyperRAM as well)
 +        if ((ddr_pins is None or ddr_addr != 0x0) and fw_addr != 0 and
 +            hyperram_addr[0] != 0x0):
 +            print ("SRAM 0x8000 at address 0x0")
 +            sram_width = 32
 +            self.sram = SRAMPeripheral(size=0x8000,
 +                                      data_width=sram_width,
 +                                      writable=True)
 +            self._decoder.add(self.sram.bus, addr=0x0) # RAM at 0x0
++=======
+                                 clk_freq=nest_freq)
+             if fpga == 'sim':
+                 self.dramcore = dramcore
+             else:
+                 self.dramcore = drs(dramcore)
+             self._decoder.add(self.dramcore.bus, addr=dramcore_addr)
+             # map the DRAM onto Wishbone, XXX use stall but set classic below
+             drambone = gramWishbone(dramcore, features={'stall'})
+             if fpga == 'sim':
+                 self.drambone = drambone
+             else:
+                 self.drambone = drs(drambone)
+             self._decoder.add(self.drambone.bus, addr=ddr_addr)
+             # additional SRAM at address if DRAM is not also at 0x0
+             # (TODO, check Flash, and HyperRAM as well)
+             if ddr_addr != 0x0:
+                 sram_width = 32
+                 self.bootmem = SRAMPeripheral(size=0x8000,
+                                           data_width=sram_width,
+                                           writable=True)
+                 self._decoder.add(self.bootmem.bus, addr=0x0) # RAM at 0x0
++>>>>>>> origin/ddr3
  
          # SPI controller
          if spi_0_pins is not None and fpga in ['sim',
          if hasattr(self, "cpu"):
              m.submodules.intc = self.intc
              m.submodules.extcore = self.cpu
+             m.submodules.dbusasyncbr = self.dbusasyncbr
+             m.submodules.ibusasyncbr = self.ibusasyncbr
              m.submodules.dbuscvt = self.dbusdowncvt
              m.submodules.ibuscvt = self.ibusdowncvt
 -            # create stall sigs, assume wishbone classic
 -            #ibus, dbus = self.cvtibus, self.cvtdbus
 -            #comb += ibus.stall.eq(ibus.stb & ~ibus.ack)
 -            #comb += dbus.stall.eq(dbus.stb & ~dbus.ack)
  
+             # add wb async bridge verilog source. assumes a directory structure where
+             # microwatt has been checked out in a common subdirectory with:
+             # git clone https://github.com/alexforencich/verilog-wishbone.git
+             # git checkout d1fa24a0
+             verilog_wishbone = "../../verilog-wishbone/rtl"
+             pth = os.path.split(__file__)[0]
+             pth = os.path.join(pth, verilog_wishbone)
+             fname = os.path.abspath(pth)
+             print (fname)
+             self.dbusasyncbr.add_verilog_source(fname, platform)
+             self.ibusasyncbr.add_verilog_source(fname, platform)
          m.submodules.arbiter = self._arbiter
          m.submodules.decoder = self._decoder
          if hasattr(self, "ddrphy"):
              m.submodules.ddrphy = self.ddrphy
              m.submodules.dramcore = self.dramcore
              m.submodules.drambone = drambone = self.drambone
 -            # grrr, same problem with drambone: not WB4-pipe compliant
 -            comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack)
 +
 +            # add async wishbone bridges
 +            if hasattr(self, "ddrphy_async_br"):
 +                m.submodules.ddrphy_async_br = self.ddrphy_async_br
 +            if hasattr(self, "dramcore_async_br"):
 +                m.submodules.dramcore_async_br = self.dramcore_async_br
 +            if hasattr(self, "drambone_async_br"):
 +                m.submodules.drambone_async_br = self.drambone_async_br
 +
 +            # grrr, same problem with WB async bridge: not WB4-pipe compliant
 +            dab = self.ddrphy_bus
 +            if hasattr(dab, "stall"):
 +                comb += dab.stall.eq(dab.cyc & ~dab.ack)
 +            dab = self.dramcore_bus
 +            if hasattr(dab, "stall"):
 +                comb += dab.stall.eq(dab.cyc & ~dab.ack)
 +            dab = self.drambone_bus
 +            comb += dab.stall.eq(dab.cyc & ~dab.ack)
 +
 +            # add wb async bridge verilog source. assumes directory structure
 +            # where bridge has been checked out in a common subdirectory with:
 +            # git clone https://github.com/alexforencich/verilog-wishbone.git
 +            # git checkout d1fa24a0
 +            verilog_wishbone = "../../verilog-wishbone/rtl"
 +            pth = os.path.split(__file__)[0]
 +            pth = os.path.join(pth, verilog_wishbone)
 +            fname = os.path.abspath(pth)
 +            print (fname)
 +            if hasattr(self, "ddrphy_async_br"):
 +                self.dramcore_async_br.add_verilog_source(fname, platform)
 +            if hasattr(self, "drambone_async_br"):
 +                self.drambone_async_br.add_verilog_source(fname, platform)
  
+             # DRAM clock control / reset signals
+             comb += self.crg.ddr_clk_stop.eq(self.ddrphy.init.stop)
+             comb += self.crg.ddr_clk_reset.eq(self.ddrphy.init.reset)
          # add hyperram module
 -        if hasattr(self, "hyperram"):
 -            m.submodules.hyperram = hyperram = self.hyperram
 +        for i, hr in enumerate(self.hyperram):
 +            m.submodules["hyperram%d" % i] = hr
              # grrr, same problem with hyperram: not WB4-pipe compliant
 -            comb += hyperram.bus.stall.eq(hyperram.bus.cyc & ~hyperram.bus.ack)
 -            # set 3 top CSn lines to zero for now
 +            comb += hr.bus.stall.eq(hr.bus.cyc & ~hr.bus.ack)
 +            # reset
              if self.fpga == 'arty_a7':
 -                comb += hyperram.phy.rst_n.eq(ResetSignal())
 +                comb += hr.phy.rst_n.eq(ResetSignal())
  
          # add blinky lights so we know FPGA is alive
          if platform is not None:
@@@ -868,41 -723,31 +998,70 @@@ def build_platform(fpga, firmware)
      print ("platform", fpga, firmware, platform)
  
      # set clock frequency
++<<<<<<< HEAD
 +    clk_freq = 70e6
 +    dram_clk_freq = None
 +    if fpga == 'sim':
 +        clk_freq = 100e6
 +        dram_clk_freq = clk_freq
 +    if fpga == 'isim':
 +        clk_freq = 50e6 # below 50 mhz, stops DRAM being enabled
 +        #dram_clk_freq = clk_freq
 +        dram_clk_freq = 100e6
 +    if fpga == 'versa_ecp5':
 +        clk_freq = 50e6 # crank right down to timing threshold
 +        #dram_clk_freq = 55e6
++=======
+     core_freq = 70e6
+     nest_freq = core_freq
+     if fpga == 'sim':
+         core_freq = 100e6
+         nest_freq = core_freq
+     if fpga == 'isim':
+         core_freq = 55e6 # below 50 mhz, stops DRAM being enabled
+         nest_freq = core_freq
+     if fpga == 'versa_ecp5':
+         core_freq = 55e6 # crank right down to test hyperram
+         nest_freq = core_freq
++>>>>>>> origin/ddr3
      if fpga == 'versa_ecp5_85':
+         core_freq = 50e6
+         # DDR3 system interface is clocked at the nest frequency.
          # 50MHz works.  100MHz works.  55MHz does NOT work.
++<<<<<<< HEAD
 +        # Stick with multiples of 50MHz...
 +        clk_freq = 50e6
 +        dram_clk_freq = 100e6
 +    if fpga == 'arty_a7':
 +        clk_freq = 27.0e6 # urrr "working" with the QSPI core (25 mhz does not)
 +    if fpga == 'ulx3s':
 +        clk_freq = 40.0e6
 +    if fpga == 'orangecrab' or fpga=='orangecrab_isim':
 +        clk_freq = 50e6
 +    core_clk_freq = clk_freq
 +
 +    # merge dram_clk_freq with clk_freq if the same
 +    if clk_freq == dram_clk_freq:
 +        dram_clk_freq = None
 +
 +    # see if dram can be enabled
 +    enable_dram = False
 +    if dram_clk_freq is not None and dram_clk_freq >= 50e6:
 +        enable_dram = True
 +    if dram_clk_freq is None and clk_freq >= 50e6:
 +        enable_dram = True
++=======
+         # Stick with multiples of 25MHz...
+         # Note the actual DDR3 clock is 2x the nest, as the name
+         # implies...
+         nest_freq = 75e6
+     if fpga == 'arty_a7':
+         core_freq = 50e6
+         nest_freq = core_freq
+     if fpga == 'ulx3s':
+         core_freq = 40.0e6
+         nest_freq = core_freq
++>>>>>>> origin/ddr3
  
      # select a firmware address
      fw_addr = None
  
      # get DDR resource pins, disable if clock frequency is below 50 mhz for now
      ddr_pins = None
++<<<<<<< HEAD
 +    if (enable_dram and platform is not None and
 +        fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim',
 +                 'orangecrab','orangecrab_isim']): # not yet 'arty_a7',
++=======
+     if (nest_freq >= 50e6 and platform is not None and
+         fpga in ['versa_ecp5', 'versa_ecp5_85', 'arty_a7', 'isim']):
++>>>>>>> origin/ddr3
          ddr_pins = platform.request("ddr3", 0,
                                      dir={"dq":"-", "dqs":"-"},
 -                                    xdr={"rst": 1, "clk":4, "a":4,
 +                                    xdr={"rst": 4, "clk":4, "a":4,
                                           "ba":4, "clk_en":4,
                                           "odt":4, "ras":4, "cas":4, "we":4,
                                           "cs": 4})
                    ddr_addr=0x00000000,      # DRAM_BASE
                    spi0_addr=0xf0000000,     # SPI0_BASE
                    spi0_cfg_addr=0xc0006000, # SPI0_CTRL_BASE
++<<<<<<< HEAD
 +                  eth0_cfg_addr=0xc000c000, # ETH0_CTRL_BASE (4k)
 +                  eth0_irqno=1,             # ETH0_IRQ number (match microwatt)
 +                  hyperram_addr=hyperram_addr, # determined above
++=======
+                   eth0_cfg_addr=0xc0004000, # ETH0_CTRL_BASE (4k)
+                   eth0_irqno=0,             # ETH0_IRQ number
+                   hyperram_addr=0xa0000000, # HYPERRAM_BASE
++>>>>>>> origin/ddr3
                    fw_addr=fw_addr,
                    #fw_addr=None,
                    ddr_pins=ddr_pins,
                    ethmac_0_pins=ethmac_0_pins,
                    hyperram_pins=hyperram_pins,
                    firmware=firmware,
++<<<<<<< HEAD
 +                  xics_icp_addr=0xc000_4000, # XICS_ICP_BASE
 +                  xics_ics_addr=0xc000_5000, # XICS_ICS_BASE
 +                  clk_freq=clk_freq,
 +                  dram_clk_freq=dram_clk_freq,
 +                  core_clk_freq=core_clk_freq,
++=======
+                   core_freq=core_freq,
+                   nest_freq=nest_freq,
++>>>>>>> origin/ddr3
                    add_cpu=True)
  
      if toolchain == 'Trellis':