#include "elf64.h"
++//#ifdef ICARUS_DEBUG
++//HACK
++#define DEBUG2 0xfff00018
++#define DEBUG3 0xfff0001C
++#define DEBUG4 0xfff00020
++//#endif
++
+#define ORANGECRAB_MODE_REGISTERS 0x0320, 0x0002, 0x0200, 0x0000
+
+static inline void mtspr(int sprnum, unsigned long val)
+{
+ __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
+}
+
static inline uint32_t read32(const void *addr)
{
return *(volatile uint32_t *)addr;
}
+extern void crank_up_qspi_level1(void);
+extern int host_spi_flash_init(void);
+
++#if 0
static bool fl_read(void *dst, uint32_t offset, uint32_t size)
{
uint8_t *d = dst;
memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size);
return true;
}
++#endif
-static unsigned long copy_flash(unsigned int offset)
++#if 0
++//requires working dram
+static unsigned long copy_flash(unsigned int offset, unsigned int dst_offs)
{
Elf64_Ehdr ehdr;
Elf64_Phdr ph;
return -1ul;
}
++#endif
// XXX
return 0;
}
++
int main(void) {
const int kNumIterations = 14;
int res, failcnt = 0;
unsigned long ftr, spi_offs=0x0;
volatile uint32_t *ram = (uint32_t*)MEMORY_BASE;
++ #ifdef ICARUS_DEBUG
++ writel(0xFF,DEBUG2);
++ #endif
console_init();
++ #ifdef ICARUS_DEBUG
++ writel(0xFC,DEBUG2);
++ #endif
//puts("Firmware launched...\n");
#if 1
return 0;
#endif
- for (int persistence=0; persistence < 1000; persistence++) {
+ // init DRAM only if SYSCON says it exists (duh)
+ if (ftr & SYS_REG_INFO_HAS_DRAM)
+ {
++
puts("DRAM init... ");
struct gramCtx ctx;
};
#endif
struct gramProfile profile2;
-- gram_init(&ctx, &profile, (void*)MEMORY_BASE,
-- (void*)DRAM_CTRL_BASE,
-- (void*)DRAM_INIT_BASE);
++ #ifdef ICARUS_DEBUG
++ writel(0xCC,DEBUG2);
++ #endif
++ gram_init(&ctx, &profile, (void*)MEMORY_BASE, /* "Main" memory alias, either BRAM or DRAM */
++
++ #if 0
++ dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET|DFII_COMMAND_CS); //if software_control
++ //enable clocks and https://en.wikipedia.org/wiki/On-die_termination
++
++ dfii_initseq(ctx, profile); -> writes to core
++ gram_load_calibration(ctx, profile); -> writes to phy
++
++ dfii_setcontrol(ctx, DFII_CONTROL_SEL|DFII_CONTROL_RESET);
++ #endif
++
++
++
++ /* TODO verify this, most likely the error is in the phy
++ * # create the core (bridge from PHY to DFI)
++ * see gram/core/__init__.py
++ * self.dfii = DFIInjector(
++ csr_bank=CSRPrefixProxy(bank, "dfii"),
++ * csr_bank.csr(4, "w") # sel, clk_en, odt, reset
++ * per phase :
++ * self._command = csr_bank.csr(6, "w")
++ * self._command_issue = csr_bank.csr(1, "w")
++ * =====================================================================
++ dramcore = gramCore(phy=self.ddrphy,
++ geom_settings=ddrmodule.geom_settings,
++ timing_settings=ddrmodule.timing_settings,
++ #features=features,
++ clk_freq=self.dram_clk_freq)
++ */
++ (void*)DRAM_CTRL_BASE, /* LiteDRAM control registers dramcore_addr=0xc8000000 */
++ (void*)0xfff00000); /* guess: this was wrong => now fixed */
++ #ifdef ICARUS_DEBUG
++ writel(0xDD,DEBUG2);
++ #endif
puts("done\n");
puts("MR profile: ");
puts("Auto calibrating... ");
res = gram_generate_calibration(&ctx, &profile2);
if (res != GRAM_ERR_NONE) {
-- puts("failed\n");
++ //guess: always fails in simulation, only sometimes on real hardware
++ #ifdef ICARUS_DEBUG
++ writel(0xFFFFAAAA,DEBUG1);//stop simulation
++ #endif
++ puts("fAiLED\n"); //TODO: find other error locations
++ while(1){}
gram_load_calibration(&ctx, &profile);
} else {
++ //puts("NO_ERROR\n");
gram_load_calibration(&ctx, &profile2);
}
puts("done\n");
for (size_t i = 0; i < kNumIterations; i++) {
writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
}
++
#if 0
for (int dly = 0; dly < 8; dly++) {
uart_writeuint32(profile2.rdly_p1);
gram_load_calibration(&ctx, &profile2);
for (size_t i = 0; i < kNumIterations; i++) {
-- if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
++ uint32_t r = readl((unsigned long)&(ram[i]));
++ uint32_t c = (0xDEAF0000 | i*4);
++ if (r!=c) {
puts("fail : *(0x");
uart_writeuint32((unsigned long)(&ram[i]));
-- puts(") = ");
-- uart_writeuint32(readl((unsigned long)&(ram[i])));
++ puts(") = [r]");
++ uart_writeuint32(r);
++ puts(" != [c]");
++ uart_writeuint32(r);
puts("\n");
failcnt++;
if (failcnt > 10) {
puts("Test canceled (more than 10 errors)\n");
++ #ifdef ICARUS_DEBUG
++ writel(0xFFFFBBBB,DEBUG1);//stop simulation
++ #endif
break;
}
}
}
#endif
puts("done\n");
-
- // memcpy from SPI Flash to SDRAM then boot
++ #ifdef ICARUS_DEBUG
++ writel(0xFFFFFFFF,DEBUG1);//stop simulation
++ #endif
+
+#if 0 // ooo, annoying: won't work. no idea why
+ // temporary hard-hack: boot directly from QSPI. really
+ // should do something like detect at least... something
+ if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH))
+ {
+ // jump to absolute address
+ mtspr(8, SPI_FLASH_BASE); // move address to LR
+ __asm__ volatile("blr");
+ return 0;
+ }
+#endif
-
++
++ #if 0
+ // memcpy from SPI Flash then boot
if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) &&
- (ftr & SYS_REG_INFO_HAS_DRAM) &&
(failcnt == 0))
{
+/*
+ puts("ELF @ QSPI\n");
// identify ELF, copy if present, and get the start address
- unsigned long faddr = copy_flash(spi_offs);
+ unsigned long faddr = copy_flash(spi_offs,
+ 0x600000); // hack!
if (faddr != -1ul) {
// jump to absolute address
+ mtspr(8, faddr); // move address to LR
+ __asm__ volatile("blr");
+
+ // works with head.S which copies r3 into ctr then does bctr
return faddr;
}
+ puts("copy QSPI\n");
+*/
+ // another terrible hack: copy from flash at offset 0x600000
+ // a block of size 0x600000 into mem address 0x600000, then
+ // jump to it. this allows a dtb image to be executed
+ puts("copy QSPI\n");
+ volatile uint32_t *mem = (uint32_t*)0x1000000;
+ fl_read(mem, // destination in RAM
+ 0x600000, // offset into QSPI
+ 0x8000); // length - shorter (testing) 0x8000);
+ //0x1000000); // length
+ puts("dump mem\n");
+ for (int i=0;i<256;i++) {
+ tmp = readl((unsigned long)&(mem[i]));
+ uart_writeuint32(tmp);
+ puts(" ");
+ if ((i & 0x7) == 0x7) puts("\r\n");
+ }
+ puts("\r\n");
+ mtspr(8, 0x1000000); // move address to LR
+ __asm__ volatile("blr");
}
++ #endif
++
return 0;
}
class ECP5CRG(Elaboratable):
++<<<<<<< HEAD
+ def __init__(self, sys_clk_freq=100e6, dram_clk_freq=None,
+ pod_bits=25, sync_bits=26, need_bridge=False):
+ """when dram_clk_freq=None, a dramsync domain is still created
+ but it is an alias of sync domain. likewise the 2x
+ """
+ self.sys_clk_freq = sys_clk_freq
+ self.dram_clk_freq = dram_clk_freq
+ self.pod_bits = pod_bits # for init domain
+ self.sync_bits = sync_bits # for all other domains
+ self.need_bridge = need_bridge # insert ECLKBRIDGECS
+ assert pod_bits <= sync_bits, \
+ "power-on-delay bits %d should " \
+ " be less than sync_bits %d" % (pod_bits, sync_bits)
+
+ def phase2_domain(self, m, pll, name, freq, esyncb):
+ """creates a domain that can be used with xdr=4 platform resources.
+
+ this requires creating a domain at *twice* the frequency,
+ then halving it. the doubled-frequency can then go to the
+ eclk parts of a 4-phase IOPad:
+ pads.a.o_clk.eq(ClockSignal("dramsync")),
+ pads.a.o_fclk.eq(ClockSignal("dramsync2x")),
+ """
+
+ # create names
+ cd2x = "%s2x" % name
+ cd2x_ub = cd2x+"_unbuf"
+ cd = name
+ # Generating sync2x from extclk
+ cd_2x = ClockDomain(cd2x, local=False)
+ cd_2x_unbuf = ClockDomain(cd2x_ub, local=False, reset_less=True)
+ cd_ = ClockDomain("%s" % name, local=False)
+
+ # create PLL clocks
+ pll.create_clkout(ClockSignal(cd2x_ub), 2*freq)
+ if esyncb:
+ if self.need_bridge:
+ sys2x_clk_ecsout = Signal()
+ m.submodules["%s_eclkbridgecs" % cd] = Instance("ECLKBRIDGECS",
+ i_CLK0 = ClockSignal(cd2x_ub),
+ i_SEL = 0,
+ o_ECSOUT = sys2x_clk_ecsout)
+ m.submodules["%s_eclksyncb" % cd] = Instance("ECLKSYNCB",
+ i_ECLKI = sys2x_clk_ecsout,
+ i_STOP = 0,
+ o_ECLKO = ClockSignal(cd2x))
+ else:
+ m.submodules["%s_eclksyncb" % cd] = Instance("ECLKSYNCB",
+ i_ECLKI = ClockSignal(cd2x_ub),
+ i_STOP = 0,
+ o_ECLKO = ClockSignal(cd2x))
+ else:
+ m.d.comb += ClockSignal(cd2x).eq(ClockSignal(cd2x_ub)) # no esyncb
+ m.domains += cd_2x_unbuf
+ m.domains += cd_2x
+ m.domains += cd_
+
+ # # Generating sync from sync2x
+ m.submodules["%s_clkdivf" % cd] = Instance("CLKDIVF",
+ p_DIV="2.0",
+ i_ALIGNWD=0,
+ i_CLKI=ClockSignal(cd2x),
+ i_RST=0,
+ o_CDIVX=ClockSignal(cd))
++=======
+ def __init__(self, sys_clk_freq=100e6, core_clk_freq=100e6, pod_bits=25):
+ self.sys_clk_freq = sys_clk_freq
+ self.core_clk_freq = core_clk_freq
+ self.pod_bits = pod_bits
++>>>>>>> origin/ddr3
+
+ # DDR clock control signals
+ self.ddr_clk_stop = Signal()
+ self.ddr_clk_reset = Signal()
def elaborate(self, platform):
m = Module()
gsr0 = Signal()
gsr1 = Signal()
- m.submodules += [
- Instance("FD1S3AX", p_GSR="DISABLED",
+ m.submodules.gsr0 = Instance("FD1S3AX", p_GSR="DISABLED",
i_CK=ClockSignal("rawclk"),
i_D=~reset,
- o_Q=gsr0),
- Instance("FD1S3AX", p_GSR="DISABLED",
+ o_Q=gsr0)
+ m.submodules.gsr1 = Instance("FD1S3AX", p_GSR="DISABLED",
i_CK=ClockSignal("rawclk"),
i_D=gsr0,
- o_Q=gsr1),
- Instance("SGSR", i_CLK=ClockSignal("rawclk"),
- i_GSR=gsr1),
- ]
+ o_Q=gsr1)
+ m.submodules.sgsr = Instance("SGSR", i_CLK=ClockSignal("rawclk"),
+ i_GSR=gsr1)
- # PLL
- m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~reset)
-
- # Power-on delay (655us)
+ # Power-on delay
podcnt = Signal(self.pod_bits, reset=-1)
+ synccnt = Signal(self.sync_bits, reset=-1)
pod_done = Signal()
++<<<<<<< HEAD
+ sync_done = Signal()
+ with m.If((synccnt != 0) & pll.locked):
+ m.d.rawclk += synccnt.eq(synccnt-1)
+ with m.If((podcnt != 0) & pll.locked):
++=======
+ with m.If(podcnt != 0):
++>>>>>>> origin/ddr3
m.d.rawclk += podcnt.eq(podcnt-1)
m.d.rawclk += pod_done.eq(podcnt == 0)
+ m.d.rawclk += sync_done.eq(synccnt == 0)
++<<<<<<< HEAD
+ # and reset which only drops when the PLL is done and pod completes
++=======
+ # PLL
+ m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~pod_done|~reset)
+
+ # Generating sync2x (200Mhz) and init (25Mhz) from extclk
+ # sync is our "nest" frequency, cpu is the core frequency
+ # On ASIC, core >= nest, on FPGA, cpu <= nest...
+ cd_sync2x = ClockDomain("sync2x", local=False)
+ cd_sync2x_unbuf = ClockDomain("sync2x_unbuf",
+ local=False, reset_less=True)
+ cd_init = ClockDomain("init", local=False)
+ cd_sync = ClockDomain("sync", local=False)
+ cd_cpu = ClockDomain("cpu", local=False)
+ cd_dramsync = ClockDomain("dramsync", local=False)
+
+ # create PLL clocks
+ pll.set_clkin_freq(platform.default_clk_frequency)
+ pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq)
+ pll.create_clkout(ClockSignal("init"), 25e6)
+ if self.sys_clk_freq == self.core_clk_freq:
+ m.d.comb += ClockSignal("cpu").eq(ClockSignal("sync"))
+ else:
+ pll.create_clkout(ClockSignal("cpu"), self.core_clk_freq)
+ m.submodules += Instance("ECLKSYNCB",
+ i_ECLKI = ClockSignal("sync2x_unbuf"),
+ i_STOP = self.ddr_clk_stop,
+ o_ECLKO = ClockSignal("sync2x"))
+ m.domains += cd_sync2x_unbuf
+ m.domains += cd_sync2x
+ m.domains += cd_init
+ m.domains += cd_sync
+ m.domains += cd_cpu
+ m.domains += cd_dramsync
++>>>>>>> origin/ddr3
reset_ok = Signal(reset_less=True)
+ sync_reset_ok = Signal(reset_less=True)
m.d.comb += reset_ok.eq(~pll.locked|~pod_done)
++<<<<<<< HEAD
+ m.d.comb += sync_reset_ok.eq(~pll.locked|~sync_done)
++=======
+ m.d.comb += ResetSignal("init").eq(reset_ok)
+ m.d.comb += ResetSignal("sync").eq(reset_ok)
+ m.d.comb += ResetSignal("cpu").eq(reset_ok)
+ m.d.comb += ResetSignal("dramsync").eq(reset_ok|self.ddr_clk_reset)
++>>>>>>> origin/ddr3
- # # Generating sync (100Mhz) from sync2x
+ # create PLL input clock from platform default frequency
+ pll.set_clkin_freq(platform.default_clk_frequency)
++<<<<<<< HEAD
+ # single or double main sync clock domain. double needs a 2nd PLL
+ # to match up with the CLKESYNCB, one per quadrant inside the ECP5
+ if self.dram_clk_freq is not None:
+ m.domains += ClockDomain("sync_unbuf", local=False, reset_less=True)
+ m.domains += ClockDomain("sync", local=False)
+ pll.create_clkout(ClockSignal("sync_unbuf"), self.sys_clk_freq)
+ m.d.comb += ClockSignal("sync").eq(ClockSignal("sync_unbuf"))
+ else:
+ # Generating sync2x and sync from extclk, which is *only* how
+ # xdr=4 can be requested on the sync domain. also do not request
+ # an edge-clock-stop
+ self.phase2_domain(m, pll, "sync", self.sys_clk_freq, True)
+ m.d.comb += ResetSignal("sync2x").eq(sync_reset_ok)
+ m.d.comb += ResetSignal("sync").eq(sync_reset_ok)
+
+ # DRAM clock: if not requested set to sync, otherwise create with
+ # a CLKESYNCB (which is set to no-stop at the moment)
+ if self.dram_clk_freq is not None:
+ self.phase2_domain(m, pll, "dramsync", self.dram_clk_freq, True)
+ else:
+ # alias dramsync and dramsync2x to sync and sync2x
+ cd_dramsync = ClockDomain("dramsync", local=False)
+ m.domains += cd_dramsync
+ m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync"))
+ # and a dram 2x sigh
+ cd_dramsync2x = ClockDomain("dramsync2x", local=False)
+ m.domains += cd_dramsync2x
+ m.d.comb += ClockSignal("dramsync2x").eq(ClockSignal("sync2x"))
+ # resets for the dram domains
+ m.d.comb += ResetSignal("dramsync2x").eq(sync_reset_ok)
+ m.d.comb += ResetSignal("dramsync").eq(sync_reset_ok)
++=======
+ m.submodules += Instance("CLKDIVF",
+ p_DIV="2.0",
+ i_ALIGNWD=0,
+ i_CLKI=ClockSignal("sync2x"),
+ i_RST=ResetSignal("dramsync"),
+ o_CDIVX=ClockSignal("sync"))
++>>>>>>> origin/ddr3
- # temporarily set dram sync clock exactly equal to main sync
- m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync"))
+ # create 25 mhz "init" clock, straight (no 2x phase stuff)
+ # this domain can be used before all others, has its own delay
+ # (sync_bits)
+ cd_init = ClockDomain("init", local=False)
+ pll.create_clkout(ClockSignal("init"), 25e6)
+ m.domains += cd_init
+ m.d.comb += ResetSignal("init").eq(reset_ok)
return m
class DDR3SoC(SoC, Elaboratable):
def __init__(self, *,
fpga,
- dram_cls,
- uart_pins, spi_0_pins, ethmac_0_pins,
- ddr_pins, ddrphy_addr, dramcore_addr, ddr_addr,
- fw_addr=0x0000_0000,
- firmware=None,
- spi0_addr, spi0_cfg_addr,
- eth0_cfg_addr, eth0_irqno,
+ dram_cls=None,
+ uart_pins=None, spi_0_pins=None, ethmac_0_pins=None,
+ ddr_pins=None, ddrphy_addr=None,
+ dramcore_addr=None, ddr_addr=None,
+ fw_addr=0x0000_0000, firmware=None,
+ uart_addr=None, uart_irqno=0,
+ spi0_addr=None, spi0_cfg_addr=None,
+ eth0_cfg_addr=None, eth0_irqno=None,
hyperram_addr=None,
hyperram_pins=None,
++<<<<<<< HEAD
+ xics_icp_addr=None, xics_ics_addr=None,
+ clk_freq=50e6,
+ dram_clk_freq=None,
+ core_clk_freq=50e6,
++=======
+ nest_freq=50e6,
+ core_freq=50e6,
++>>>>>>> origin/ddr3
add_cpu=True):
# wishbone routing is as follows:
# | |
# +--+--+
# |
+ # WBAsyncBridge
+ # |
# 64to32DownCvt
# |
- # arbiter------------------------------------------+
- # | |
- # +---decoder----+--------+---------+-------+--------+ |
- # | | | | | | | |
- # uart XICS CSRs DRAM XIP SPI HyperRAM EthMAC
+ # arbiter------------------------------------------------------+
+ # | |
+ # +---decoder----+--------+---------------+-------------+--------+ |
+ # | | | | | | | |
+ # | | | WBAsyncBridge | | | |
+ # | | | | | | | |
+ # uart XICS CSRs DRAM XIP SPI HyperRAM EthMAC
# set up wishbone bus arbiter and decoder. arbiter routes,
# decoder maps local-relative addressed satellites to global addresses
# set up clock request generator
pod_bits = 25
++<<<<<<< HEAD
+ sync_bits = 26
+ need_bridge=False
+ if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s',
+ 'orangecrab','orangecrab_isim', 'rcs_arctic_tern_bmc_card']:
+ if fpga in ['isim','orangecrab_isim']:
+ pod_bits = 5
+ sync_bits = 6
+ if fpga in ['orangecrab', 'orangecrab_sim',
+ 'rcs_arctic_tern_bmc_card']:
+ need_bridge=True
+ self.crg = ECP5CRG(clk_freq, dram_clk_freq=dram_clk_freq,
+ pod_bits=pod_bits, sync_bits=sync_bits,
+ need_bridge=need_bridge)
++=======
+ if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s']:
+ if fpga in ['isim']:
+ pod_bits = 6
+ self.crg = ECP5CRG(sys_clk_freq=nest_freq, core_clk_freq=core_freq,
+ pod_bits=pod_bits)
++>>>>>>> origin/ddr3
if fpga in ['arty_a7']:
- self.crg = ArtyA7CRG(clk_freq)
+ self.crg = ArtyA7CRG(core_freq)
- # set up CPU, with 64-to-32-bit downconverters
+ self.dram_clk_freq = dram_clk_freq
+ if self.dram_clk_freq is None:
+ self.dram_clk_freq = clk_freq
+
+ # set up CPU, with 64-to-32-bit downconverters, and a delayed Reset
if add_cpu:
++<<<<<<< HEAD
+ self.cpu = ExternalCore(name="ext_core")
+
++=======
+ drs = DomainRenamer("cpu")
+
+ self.cpu = drs(ExternalCore(name="ext_core"))
+ if nest_freq == core_freq:
+ # No Wishbone bridge required
+ self.asbrdbus = self.cpu.dbus
+ self.asbribus = self.cpu.ibus
+ else:
+ # Asynchronous Wishbone bridge required
+ asbrdbus = wishbone.Interface(addr_width=32, data_width=64,
+ granularity=8, features={'stall'})
+ asbribus = wishbone.Interface(addr_width=32, data_width=64,
+ granularity=8, features={'stall'})
+ self.dbusasyncbr = WBAsyncBridge(master_bus=self.cpu.dbus,
+ slave_bus=asbrdbus,
+ master_clock_domain="cpu",
+ slave_clock_domain=None,
+ address_width=32, data_width=64,
+ granularity=8, features={'stall'})
+ self.ibusasyncbr = WBAsyncBridge(master_bus=self.cpu.ibus,
+ slave_bus=asbribus,
+ master_clock_domain="cpu",
+ slave_clock_domain=None,
+ address_width=32, data_width=64,
+ granularity=8, features={'stall'})
+ self.asbrdbus = asbrdbus
+ self.asbribus = asbribus
++>>>>>>> origin/ddr3
cvtdbus = wishbone.Interface(addr_width=30, data_width=32,
granularity=8, features={'stall'})
cvtibus = wishbone.Interface(addr_width=30, data_width=32,
self._decoder.add(self.bootmem.bus, addr=fw_addr) # ROM at fw_addr
# System Configuration info
- # offset executable ELF payload at 1 megabyte offset (1<<20)
- spi_offset = 1<<20 if (spi_0_pins is not None) else None
+ # offset executable ELF payload at 6 megabyte offset (2<<20)
+ spi_offset = 2<<20 if (spi_0_pins is not None) else None
dram_offset = ddr_addr if (ddr_pins is not None) else None
++<<<<<<< HEAD
+ self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq,
+ mem_clk_freq=self.dram_clk_freq,
+ core_clk_freq=core_clk_freq,
++=======
+ self.syscon = MicrowattSYSCON(sys_clk_freq=nest_freq,
+ core_clk_freq=core_freq,
++>>>>>>> origin/ddr3
has_uart=(uart_pins is not None),
spi_offset=spi_offset,
dram_addr=dram_offset)
tRAS=44)}
"""
++<<<<<<< HEAD
+ # DRAM Module. first, create the (triple) modules:
+ # * DDR PHY
+ # * gram Core: presents PHY with a DFI Interface
+ # * gram Bone (aka gram-with-wishbone) connects wishbone to DFI
+ # from there it gets a little complicated because of supporting
+ # several options: simulation, synchronous, and asynchronous clocks.
+ # dram_clk_freq can *never* be set equal to clk_freq, if it is,
+ # it's assumed to be synchronous, and the dram Domains need renaming
+
+ if ddr_pins is not None: # or fpga == 'sim':
+ ddrmodule = dram_cls(self.dram_clk_freq, "1:2") # match DDR3 P/N
+
+ # remap both the sync domain (wherever it occurs) and
+ # the sync2x domain, if dram frequency is specified and
+ # not equal to the core clock
+ drs = None
+ if dram_clk_freq is not None or fpga == 'sim':
+ drs = lambda x: x
+ else:
+ drs = DomainRenamer({"sync": "dramsync",
+ "sync2x": "dramsync2x"})
++=======
+ # DRAM Module
+ if ddr_pins is not None or fpga == 'sim':
+ ddrmodule = dram_cls(nest_freq, "1:2") # match DDR3 ASIC P/N
++>>>>>>> origin/ddr3
- #drs = lambda x: x
- drs = DomainRenamer("dramsync")
+ features = set()
+ if dram_clk_freq is None:
+ features.add("stall")
+ # create the PHY (fake one for sim)
if fpga == 'sim':
+ settings = sim_ddr3_settings(self.dram_clk_freq)
self.ddrphy = FakePHY(module=ddrmodule,
++<<<<<<< HEAD
+ settings=settings,
+ verbosity=SDRAM_VERBOSE_DBG,
+ clk_freq=self.dram_clk_freq)
+ else:
+ self.ddrphy = drs(ECP5DDRPHY(ddr_pins,
+ #features=features,
+ sys_clk_freq=self.dram_clk_freq))
++=======
+ settings=sim_ddr3_settings(nest_freq),
+ verbosity=SDRAM_VERBOSE_DBG,
+ clk_freq=nest_freq)
+ else:
+ self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=nest_freq))
+ self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr)
++>>>>>>> origin/ddr3
+ # create the core (bridge from PHY to DFI)
dramcore = gramCore(phy=self.ddrphy,
geom_settings=ddrmodule.geom_settings,
timing_settings=ddrmodule.timing_settings,
++<<<<<<< HEAD
+ #features=features,
+ clk_freq=self.dram_clk_freq)
+ self.dramcore = drs(dramcore)
+
+ # create the wishbone presentation (wishbone to DFI)
+ drambone = gramWishbone(dramcore, features=features)
+ self.drambone = drs(drambone)
+
+ # this is the case where sys_clk === dram_clk. no ASync Bridge
+ # needed, so just let the phy core and wb-dfi be connected
+ # directly to WB decoder. both are running in "sync" domain
+ # (because of the DomainRenamer, above)
+
+ if ddr_pins is not None and dram_clk_freq is None:
+ self.ddrphy_bus = self.ddrphy.bus
+ self.dramcore_bus = self.dramcore.bus
+ self.drambone_bus = self.drambone.bus
+
+ # this covers the case where sys_clk != dram_clk: three separate
+ # ASync Bridges are constructed (!) and the interface that's to
+ # be wired to the WB decoder is the async bus because that's running
+ # in the "sync" domain.
+
+ if ddr_pins is not None and dram_clk_freq is not None:
+ # Set up Wishbone asynchronous bridge
+ pabus = wishbone.Interface(addr_width=self.ddrphy.bus.addr_width,
+ data_width=self.ddrphy.bus.data_width,
+ granularity=self.ddrphy.bus.granularity,
+ features={'stall'})
+ self.ddrphy_bus = pabus
+ self.ddrphy_bus.memory_map = self.ddrphy.bus.memory_map
+
+ pabr = WBAsyncBridge(master_bus=self.ddrphy_bus,
+ slave_bus=self.ddrphy.bus,
+ master_clock_domain=None,
+ slave_clock_domain="dramsync",
+ address_width=self.ddrphy.bus.addr_width,
+ data_width=self.ddrphy.bus.data_width,
+ granularity=self.ddrphy.bus.granularity)
+ self.ddrphy_async_br = pabr
+
+ # Set up Wishbone asynchronous bridge
+ dab = wishbone.Interface(addr_width=self.dramcore.bus.addr_width,
+ data_width=self.dramcore.bus.data_width,
+ granularity=self.dramcore.bus.granularity,
+ features={'stall'})
+ self.dramcore_bus = dab
+ self.dramcore_bus.memory_map = self.dramcore.bus.memory_map
+
+ dac = WBAsyncBridge(master_bus=self.dramcore_bus,
+ slave_bus=self.dramcore.bus,
+ master_clock_domain=None,
+ slave_clock_domain="dramsync",
+ address_width=self.dramcore.bus.addr_width,
+ data_width=self.dramcore.bus.data_width,
+ granularity=self.dramcore.bus.granularity)
+ self.dramcore_async_br = dac
+
+ # Set up Wishbone asynchronous bridge
+ bab = wishbone.Interface(addr_width=self.drambone.bus.addr_width,
+ data_width=self.drambone.bus.data_width,
+ granularity=self.drambone.bus.granularity,
+ features={'stall'})
+ self.drambone_bus = bab
+ self.drambone_bus.memory_map = self.drambone.bus.memory_map
+
+ bab = WBAsyncBridge(master_bus=self.drambone_bus,
+ slave_bus=self.drambone.bus,
+ master_clock_domain=None,
+ slave_clock_domain="dramsync",
+ address_width=self.drambone.bus.addr_width,
+ data_width=self.drambone.bus.data_width,
+ granularity=self.drambone.bus.granularity)
+ self.drambone_async_br = bab
+
+ if ddr_pins is not None:
+ # Add wishbone decoders
+ self._decoder.add(self.dramcore_bus, addr=dramcore_addr)
+ self._decoder.add(self.drambone_bus, addr=ddr_addr)
+ self._decoder.add(self.ddrphy_bus, addr=ddrphy_addr)
+
+ # additional SRAM at address if DRAM is not also at 0x0
+ # (TODO, check Flash, and HyperRAM as well)
+ if ((ddr_pins is None or ddr_addr != 0x0) and fw_addr != 0 and
+ hyperram_addr[0] != 0x0):
+ print ("SRAM 0x8000 at address 0x0")
+ sram_width = 32
+ self.sram = SRAMPeripheral(size=0x8000,
+ data_width=sram_width,
+ writable=True)
+ self._decoder.add(self.sram.bus, addr=0x0) # RAM at 0x0
++=======
+ clk_freq=nest_freq)
+ if fpga == 'sim':
+ self.dramcore = dramcore
+ else:
+ self.dramcore = drs(dramcore)
+ self._decoder.add(self.dramcore.bus, addr=dramcore_addr)
+
+ # map the DRAM onto Wishbone, XXX use stall but set classic below
+ drambone = gramWishbone(dramcore, features={'stall'})
+ if fpga == 'sim':
+ self.drambone = drambone
+ else:
+ self.drambone = drs(drambone)
+ self._decoder.add(self.drambone.bus, addr=ddr_addr)
+
+ # additional SRAM at address if DRAM is not also at 0x0
+ # (TODO, check Flash, and HyperRAM as well)
+ if ddr_addr != 0x0:
+ sram_width = 32
+ self.bootmem = SRAMPeripheral(size=0x8000,
+ data_width=sram_width,
+ writable=True)
+ self._decoder.add(self.bootmem.bus, addr=0x0) # RAM at 0x0
++>>>>>>> origin/ddr3
# SPI controller
if spi_0_pins is not None and fpga in ['sim',
if hasattr(self, "cpu"):
m.submodules.intc = self.intc
m.submodules.extcore = self.cpu
+ m.submodules.dbusasyncbr = self.dbusasyncbr
+ m.submodules.ibusasyncbr = self.ibusasyncbr
m.submodules.dbuscvt = self.dbusdowncvt
m.submodules.ibuscvt = self.ibusdowncvt
- # create stall sigs, assume wishbone classic
- #ibus, dbus = self.cvtibus, self.cvtdbus
- #comb += ibus.stall.eq(ibus.stb & ~ibus.ack)
- #comb += dbus.stall.eq(dbus.stb & ~dbus.ack)
+ # add wb async bridge verilog source. assumes a directory structure where
+ # microwatt has been checked out in a common subdirectory with:
+ # git clone https://github.com/alexforencich/verilog-wishbone.git
+ # git checkout d1fa24a0
+ verilog_wishbone = "../../verilog-wishbone/rtl"
+ pth = os.path.split(__file__)[0]
+ pth = os.path.join(pth, verilog_wishbone)
+ fname = os.path.abspath(pth)
+ print (fname)
+ self.dbusasyncbr.add_verilog_source(fname, platform)
+ self.ibusasyncbr.add_verilog_source(fname, platform)
+
m.submodules.arbiter = self._arbiter
m.submodules.decoder = self._decoder
if hasattr(self, "ddrphy"):
m.submodules.ddrphy = self.ddrphy
m.submodules.dramcore = self.dramcore
m.submodules.drambone = drambone = self.drambone
- # grrr, same problem with drambone: not WB4-pipe compliant
- comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack)
+
+ # add async wishbone bridges
+ if hasattr(self, "ddrphy_async_br"):
+ m.submodules.ddrphy_async_br = self.ddrphy_async_br
+ if hasattr(self, "dramcore_async_br"):
+ m.submodules.dramcore_async_br = self.dramcore_async_br
+ if hasattr(self, "drambone_async_br"):
+ m.submodules.drambone_async_br = self.drambone_async_br
+
+ # grrr, same problem with WB async bridge: not WB4-pipe compliant
+ dab = self.ddrphy_bus
+ if hasattr(dab, "stall"):
+ comb += dab.stall.eq(dab.cyc & ~dab.ack)
+ dab = self.dramcore_bus
+ if hasattr(dab, "stall"):
+ comb += dab.stall.eq(dab.cyc & ~dab.ack)
+ dab = self.drambone_bus
+ comb += dab.stall.eq(dab.cyc & ~dab.ack)
+
+ # add wb async bridge verilog source. assumes directory structure
+ # where bridge has been checked out in a common subdirectory with:
+ # git clone https://github.com/alexforencich/verilog-wishbone.git
+ # git checkout d1fa24a0
+ verilog_wishbone = "../../verilog-wishbone/rtl"
+ pth = os.path.split(__file__)[0]
+ pth = os.path.join(pth, verilog_wishbone)
+ fname = os.path.abspath(pth)
+ print (fname)
+ if hasattr(self, "ddrphy_async_br"):
+ self.dramcore_async_br.add_verilog_source(fname, platform)
+ if hasattr(self, "drambone_async_br"):
+ self.drambone_async_br.add_verilog_source(fname, platform)
+ # DRAM clock control / reset signals
+ comb += self.crg.ddr_clk_stop.eq(self.ddrphy.init.stop)
+ comb += self.crg.ddr_clk_reset.eq(self.ddrphy.init.reset)
+
# add hyperram module
- if hasattr(self, "hyperram"):
- m.submodules.hyperram = hyperram = self.hyperram
+ for i, hr in enumerate(self.hyperram):
+ m.submodules["hyperram%d" % i] = hr
# grrr, same problem with hyperram: not WB4-pipe compliant
- comb += hyperram.bus.stall.eq(hyperram.bus.cyc & ~hyperram.bus.ack)
- # set 3 top CSn lines to zero for now
+ comb += hr.bus.stall.eq(hr.bus.cyc & ~hr.bus.ack)
+ # reset
if self.fpga == 'arty_a7':
- comb += hyperram.phy.rst_n.eq(ResetSignal())
+ comb += hr.phy.rst_n.eq(ResetSignal())
# add blinky lights so we know FPGA is alive
if platform is not None:
print ("platform", fpga, firmware, platform)
# set clock frequency
++<<<<<<< HEAD
+ clk_freq = 70e6
+ dram_clk_freq = None
+ if fpga == 'sim':
+ clk_freq = 100e6
+ dram_clk_freq = clk_freq
+ if fpga == 'isim':
+ clk_freq = 50e6 # below 50 mhz, stops DRAM being enabled
+ #dram_clk_freq = clk_freq
+ dram_clk_freq = 100e6
+ if fpga == 'versa_ecp5':
+ clk_freq = 50e6 # crank right down to timing threshold
+ #dram_clk_freq = 55e6
++=======
+ core_freq = 70e6
+ nest_freq = core_freq
+ if fpga == 'sim':
+ core_freq = 100e6
+ nest_freq = core_freq
+ if fpga == 'isim':
+ core_freq = 55e6 # below 50 mhz, stops DRAM being enabled
+ nest_freq = core_freq
+ if fpga == 'versa_ecp5':
+ core_freq = 55e6 # crank right down to test hyperram
+ nest_freq = core_freq
++>>>>>>> origin/ddr3
if fpga == 'versa_ecp5_85':
+ core_freq = 50e6
+ # DDR3 system interface is clocked at the nest frequency.
# 50MHz works. 100MHz works. 55MHz does NOT work.
++<<<<<<< HEAD
+ # Stick with multiples of 50MHz...
+ clk_freq = 50e6
+ dram_clk_freq = 100e6
+ if fpga == 'arty_a7':
+ clk_freq = 27.0e6 # urrr "working" with the QSPI core (25 mhz does not)
+ if fpga == 'ulx3s':
+ clk_freq = 40.0e6
+ if fpga == 'orangecrab' or fpga=='orangecrab_isim':
+ clk_freq = 50e6
+ core_clk_freq = clk_freq
+
+ # merge dram_clk_freq with clk_freq if the same
+ if clk_freq == dram_clk_freq:
+ dram_clk_freq = None
+
+ # see if dram can be enabled
+ enable_dram = False
+ if dram_clk_freq is not None and dram_clk_freq >= 50e6:
+ enable_dram = True
+ if dram_clk_freq is None and clk_freq >= 50e6:
+ enable_dram = True
++=======
+ # Stick with multiples of 25MHz...
+ # Note the actual DDR3 clock is 2x the nest, as the name
+ # implies...
+ nest_freq = 75e6
+ if fpga == 'arty_a7':
+ core_freq = 50e6
+ nest_freq = core_freq
+ if fpga == 'ulx3s':
+ core_freq = 40.0e6
+ nest_freq = core_freq
++>>>>>>> origin/ddr3
# select a firmware address
fw_addr = None
# get DDR resource pins, disable if clock frequency is below 50 mhz for now
ddr_pins = None
++<<<<<<< HEAD
+ if (enable_dram and platform is not None and
+ fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim',
+ 'orangecrab','orangecrab_isim']): # not yet 'arty_a7',
++=======
+ if (nest_freq >= 50e6 and platform is not None and
+ fpga in ['versa_ecp5', 'versa_ecp5_85', 'arty_a7', 'isim']):
++>>>>>>> origin/ddr3
ddr_pins = platform.request("ddr3", 0,
dir={"dq":"-", "dqs":"-"},
- xdr={"rst": 1, "clk":4, "a":4,
+ xdr={"rst": 4, "clk":4, "a":4,
"ba":4, "clk_en":4,
"odt":4, "ras":4, "cas":4, "we":4,
"cs": 4})
ddr_addr=0x00000000, # DRAM_BASE
spi0_addr=0xf0000000, # SPI0_BASE
spi0_cfg_addr=0xc0006000, # SPI0_CTRL_BASE
++<<<<<<< HEAD
+ eth0_cfg_addr=0xc000c000, # ETH0_CTRL_BASE (4k)
+ eth0_irqno=1, # ETH0_IRQ number (match microwatt)
+ hyperram_addr=hyperram_addr, # determined above
++=======
+ eth0_cfg_addr=0xc0004000, # ETH0_CTRL_BASE (4k)
+ eth0_irqno=0, # ETH0_IRQ number
+ hyperram_addr=0xa0000000, # HYPERRAM_BASE
++>>>>>>> origin/ddr3
fw_addr=fw_addr,
#fw_addr=None,
ddr_pins=ddr_pins,
ethmac_0_pins=ethmac_0_pins,
hyperram_pins=hyperram_pins,
firmware=firmware,
++<<<<<<< HEAD
+ xics_icp_addr=0xc000_4000, # XICS_ICP_BASE
+ xics_ics_addr=0xc000_5000, # XICS_ICS_BASE
+ clk_freq=clk_freq,
+ dram_clk_freq=dram_clk_freq,
+ core_clk_freq=core_clk_freq,
++=======
+ core_freq=core_freq,
+ nest_freq=nest_freq,
++>>>>>>> origin/ddr3
add_cpu=True)
if toolchain == 'Trellis':