From: Tobias Platen Date: Fri, 17 Feb 2023 17:51:46 +0000 (+0100) Subject: Merge remote-tracking branch 'origin/ddr3' X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a8cedbeace9f1bc6c34b427b8126b1aa0c0f2a72;p=ls2.git Merge remote-tracking branch 'origin/ddr3' --- a8cedbeace9f1bc6c34b427b8126b1aa0c0f2a72 diff --cc coldboot/coldboot.c index 65444ee,0f42a82..477f74a --- a/coldboot/coldboot.c +++ b/coldboot/coldboot.c @@@ -11,13 -11,6 +11,20 @@@ #include "elf64.h" ++//#ifdef ICARUS_DEBUG ++//HACK ++#define DEBUG2 0xfff00018 ++#define DEBUG3 0xfff0001C ++#define DEBUG4 0xfff00020 ++//#endif ++ +#define ORANGECRAB_MODE_REGISTERS 0x0320, 0x0002, 0x0200, 0x0000 + +static inline void mtspr(int sprnum, unsigned long val) +{ + __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val)); +} + static inline uint32_t read32(const void *addr) { return *(volatile uint32_t *)addr; @@@ -94,17 -60,14 +101,21 @@@ void isr(void) } +extern void crank_up_qspi_level1(void); +extern int host_spi_flash_init(void); + ++#if 0 static bool fl_read(void *dst, uint32_t offset, uint32_t size) { uint8_t *d = dst; memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size); return true; } ++#endif -static unsigned long copy_flash(unsigned int offset) ++#if 0 ++//requires working dram +static unsigned long copy_flash(unsigned int offset, unsigned int dst_offs) { Elf64_Ehdr ehdr; Elf64_Phdr ph; @@@ -170,6 -123,6 +181,7 @@@ dump return -1ul; } ++#endif // XXX @@@ -200,6 -153,6 +212,7 @@@ int gram_write(const struct gramCtx *ct return 0; } ++ int main(void) { const int kNumIterations = 14; int res, failcnt = 0; @@@ -207,7 -160,7 +220,13 @@@ unsigned long ftr, spi_offs=0x0; volatile uint32_t *ram = (uint32_t*)MEMORY_BASE; ++ #ifdef ICARUS_DEBUG ++ writel(0xFF,DEBUG2); ++ #endif console_init(); ++ #ifdef ICARUS_DEBUG ++ writel(0xFC,DEBUG2); ++ #endif //puts("Firmware launched...\n"); #if 1 @@@ -337,9 -272,7 +356,10 @@@ return 0; #endif - for (int persistence=0; persistence < 1000; persistence++) { + // init DRAM only if SYSCON says it exists (duh) + if (ftr & SYS_REG_INFO_HAS_DRAM) + { ++ puts("DRAM init... "); struct gramCtx ctx; @@@ -362,9 -295,9 +382,44 @@@ }; #endif struct gramProfile profile2; -- gram_init(&ctx, &profile, (void*)MEMORY_BASE, -- (void*)DRAM_CTRL_BASE, -- (void*)DRAM_INIT_BASE); ++ #ifdef ICARUS_DEBUG ++ writel(0xCC,DEBUG2); ++ #endif ++ gram_init(&ctx, &profile, (void*)MEMORY_BASE, /* "Main" memory alias, either BRAM or DRAM */ ++ ++ #if 0 ++ dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET|DFII_COMMAND_CS); //if software_control ++ //enable clocks and https://en.wikipedia.org/wiki/On-die_termination ++ ++ dfii_initseq(ctx, profile); -> writes to core ++ gram_load_calibration(ctx, profile); -> writes to phy ++ ++ dfii_setcontrol(ctx, DFII_CONTROL_SEL|DFII_CONTROL_RESET); ++ #endif ++ ++ ++ ++ /* TODO verify this, most likely the error is in the phy ++ * # create the core (bridge from PHY to DFI) ++ * see gram/core/__init__.py ++ * self.dfii = DFIInjector( ++ csr_bank=CSRPrefixProxy(bank, "dfii"), ++ * csr_bank.csr(4, "w") # sel, clk_en, odt, reset ++ * per phase : ++ * self._command = csr_bank.csr(6, "w") ++ * self._command_issue = csr_bank.csr(1, "w") ++ * ===================================================================== ++ dramcore = gramCore(phy=self.ddrphy, ++ geom_settings=ddrmodule.geom_settings, ++ timing_settings=ddrmodule.timing_settings, ++ #features=features, ++ clk_freq=self.dram_clk_freq) ++ */ ++ (void*)DRAM_CTRL_BASE, /* LiteDRAM control registers dramcore_addr=0xc8000000 */ ++ (void*)0xfff00000); /* guess: this was wrong => now fixed */ ++ #ifdef ICARUS_DEBUG ++ writel(0xDD,DEBUG2); ++ #endif puts("done\n"); puts("MR profile: "); @@@ -418,9 -351,9 +473,15 @@@ puts("Auto calibrating... "); res = gram_generate_calibration(&ctx, &profile2); if (res != GRAM_ERR_NONE) { -- puts("failed\n"); ++ //guess: always fails in simulation, only sometimes on real hardware ++ #ifdef ICARUS_DEBUG ++ writel(0xFFFFAAAA,DEBUG1);//stop simulation ++ #endif ++ puts("fAiLED\n"); //TODO: find other error locations ++ while(1){} gram_load_calibration(&ctx, &profile); } else { ++ //puts("NO_ERROR\n"); gram_load_calibration(&ctx, &profile2); } puts("done\n"); @@@ -440,6 -373,6 +501,7 @@@ for (size_t i = 0; i < kNumIterations; i++) { writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i])); } ++ #if 0 for (int dly = 0; dly < 8; dly++) { @@@ -452,11 -385,11 +514,15 @@@ uart_writeuint32(profile2.rdly_p1); gram_load_calibration(&ctx, &profile2); for (size_t i = 0; i < kNumIterations; i++) { -- if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) { ++ uint32_t r = readl((unsigned long)&(ram[i])); ++ uint32_t c = (0xDEAF0000 | i*4); ++ if (r!=c) { puts("fail : *(0x"); uart_writeuint32((unsigned long)(&ram[i])); -- puts(") = "); -- uart_writeuint32(readl((unsigned long)&(ram[i]))); ++ puts(") = [r]"); ++ uart_writeuint32(r); ++ puts(" != [c]"); ++ uart_writeuint32(r); puts("\n"); failcnt++; @@@ -480,6 -413,6 +546,9 @@@ if (failcnt > 10) { puts("Test canceled (more than 10 errors)\n"); ++ #ifdef ICARUS_DEBUG ++ writel(0xFFFFBBBB,DEBUG1);//stop simulation ++ #endif break; } } @@@ -487,58 -420,22 +556,64 @@@ } #endif puts("done\n"); - - // memcpy from SPI Flash to SDRAM then boot ++ #ifdef ICARUS_DEBUG ++ writel(0xFFFFFFFF,DEBUG1);//stop simulation ++ #endif + +#if 0 // ooo, annoying: won't work. no idea why + // temporary hard-hack: boot directly from QSPI. really + // should do something like detect at least... something + if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH)) + { + // jump to absolute address + mtspr(8, SPI_FLASH_BASE); // move address to LR + __asm__ volatile("blr"); + return 0; + } +#endif - ++ ++ #if 0 + // memcpy from SPI Flash then boot if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) && - (ftr & SYS_REG_INFO_HAS_DRAM) && (failcnt == 0)) { +/* + puts("ELF @ QSPI\n"); // identify ELF, copy if present, and get the start address - unsigned long faddr = copy_flash(spi_offs); + unsigned long faddr = copy_flash(spi_offs, + 0x600000); // hack! if (faddr != -1ul) { // jump to absolute address + mtspr(8, faddr); // move address to LR + __asm__ volatile("blr"); + + // works with head.S which copies r3 into ctr then does bctr return faddr; } + puts("copy QSPI\n"); +*/ + // another terrible hack: copy from flash at offset 0x600000 + // a block of size 0x600000 into mem address 0x600000, then + // jump to it. this allows a dtb image to be executed + puts("copy QSPI\n"); + volatile uint32_t *mem = (uint32_t*)0x1000000; + fl_read(mem, // destination in RAM + 0x600000, // offset into QSPI + 0x8000); // length - shorter (testing) 0x8000); + //0x1000000); // length + puts("dump mem\n"); + for (int i=0;i<256;i++) { + tmp = readl((unsigned long)&(mem[i])); + uart_writeuint32(tmp); + puts(" "); + if ((i & 0x7) == 0x7) puts("\r\n"); + } + puts("\r\n"); + mtspr(8, 0x1000000); // move address to LR + __asm__ volatile("blr"); } ++ #endif ++ return 0; } diff --cc simsoc.ys index a4adcef,72b0f6b..deaf105 --- a/simsoc.ys +++ b/simsoc.ys @@@ -22,14 -16,9 +22,12 @@@ read_verilog ../uart16550/rtl/verilog/ read_verilog ../uart16550/rtl/verilog/uart_receiver.v read_verilog ../uart16550/rtl/verilog/uart_tfifo.v read_verilog ../uart16550/rtl/verilog/uart_wb.v + +# Tercel QSPI read_verilog ../tercel-qspi/tercel/phy.v read_verilog ../tercel-qspi/tercel/wishbone_spi_master.v - -read_verilog ../verilog-wishbone/rtl/wb_async_reg.v +# WB Async Bridge +read_verilog ../verilog-wishbone/rtl/wb_async_reg.v - # errors in the ethmac rtl, comment out for now #read_verilog ../ethmac/rtl/verilog/eth_clockgen.v #read_verilog ../ethmac/rtl/verilog/eth_cop.v diff --cc src/ecp5_crg.py index 11bb8a9,36c8f1d..6130729 --- a/src/ecp5_crg.py +++ b/src/ecp5_crg.py @@@ -169,70 -170,14 +170,81 @@@ class PLL(Elaboratable) class ECP5CRG(Elaboratable): ++<<<<<<< HEAD + def __init__(self, sys_clk_freq=100e6, dram_clk_freq=None, + pod_bits=25, sync_bits=26, need_bridge=False): + """when dram_clk_freq=None, a dramsync domain is still created + but it is an alias of sync domain. likewise the 2x + """ + self.sys_clk_freq = sys_clk_freq + self.dram_clk_freq = dram_clk_freq + self.pod_bits = pod_bits # for init domain + self.sync_bits = sync_bits # for all other domains + self.need_bridge = need_bridge # insert ECLKBRIDGECS + assert pod_bits <= sync_bits, \ + "power-on-delay bits %d should " \ + " be less than sync_bits %d" % (pod_bits, sync_bits) + + def phase2_domain(self, m, pll, name, freq, esyncb): + """creates a domain that can be used with xdr=4 platform resources. + + this requires creating a domain at *twice* the frequency, + then halving it. the doubled-frequency can then go to the + eclk parts of a 4-phase IOPad: + pads.a.o_clk.eq(ClockSignal("dramsync")), + pads.a.o_fclk.eq(ClockSignal("dramsync2x")), + """ + + # create names + cd2x = "%s2x" % name + cd2x_ub = cd2x+"_unbuf" + cd = name + # Generating sync2x from extclk + cd_2x = ClockDomain(cd2x, local=False) + cd_2x_unbuf = ClockDomain(cd2x_ub, local=False, reset_less=True) + cd_ = ClockDomain("%s" % name, local=False) + + # create PLL clocks + pll.create_clkout(ClockSignal(cd2x_ub), 2*freq) + if esyncb: + if self.need_bridge: + sys2x_clk_ecsout = Signal() + m.submodules["%s_eclkbridgecs" % cd] = Instance("ECLKBRIDGECS", + i_CLK0 = ClockSignal(cd2x_ub), + i_SEL = 0, + o_ECSOUT = sys2x_clk_ecsout) + m.submodules["%s_eclksyncb" % cd] = Instance("ECLKSYNCB", + i_ECLKI = sys2x_clk_ecsout, + i_STOP = 0, + o_ECLKO = ClockSignal(cd2x)) + else: + m.submodules["%s_eclksyncb" % cd] = Instance("ECLKSYNCB", + i_ECLKI = ClockSignal(cd2x_ub), + i_STOP = 0, + o_ECLKO = ClockSignal(cd2x)) + else: + m.d.comb += ClockSignal(cd2x).eq(ClockSignal(cd2x_ub)) # no esyncb + m.domains += cd_2x_unbuf + m.domains += cd_2x + m.domains += cd_ + + # # Generating sync from sync2x + m.submodules["%s_clkdivf" % cd] = Instance("CLKDIVF", + p_DIV="2.0", + i_ALIGNWD=0, + i_CLKI=ClockSignal(cd2x), + i_RST=0, + o_CDIVX=ClockSignal(cd)) ++======= + def __init__(self, sys_clk_freq=100e6, core_clk_freq=100e6, pod_bits=25): + self.sys_clk_freq = sys_clk_freq + self.core_clk_freq = core_clk_freq + self.pod_bits = pod_bits ++>>>>>>> origin/ddr3 + + # DDR clock control signals + self.ddr_clk_stop = Signal() + self.ddr_clk_reset = Signal() def elaborate(self, platform): m = Module() @@@ -252,80 -197,76 +264,132 @@@ gsr0 = Signal() gsr1 = Signal() - m.submodules += [ - Instance("FD1S3AX", p_GSR="DISABLED", + m.submodules.gsr0 = Instance("FD1S3AX", p_GSR="DISABLED", i_CK=ClockSignal("rawclk"), i_D=~reset, - o_Q=gsr0), - Instance("FD1S3AX", p_GSR="DISABLED", + o_Q=gsr0) + m.submodules.gsr1 = Instance("FD1S3AX", p_GSR="DISABLED", i_CK=ClockSignal("rawclk"), i_D=gsr0, - o_Q=gsr1), - Instance("SGSR", i_CLK=ClockSignal("rawclk"), - i_GSR=gsr1), - ] + o_Q=gsr1) + m.submodules.sgsr = Instance("SGSR", i_CLK=ClockSignal("rawclk"), + i_GSR=gsr1) - # PLL - m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~reset) - - # Power-on delay (655us) + # Power-on delay podcnt = Signal(self.pod_bits, reset=-1) + synccnt = Signal(self.sync_bits, reset=-1) pod_done = Signal() ++<<<<<<< HEAD + sync_done = Signal() + with m.If((synccnt != 0) & pll.locked): + m.d.rawclk += synccnt.eq(synccnt-1) + with m.If((podcnt != 0) & pll.locked): ++======= + with m.If(podcnt != 0): ++>>>>>>> origin/ddr3 m.d.rawclk += podcnt.eq(podcnt-1) m.d.rawclk += pod_done.eq(podcnt == 0) + m.d.rawclk += sync_done.eq(synccnt == 0) ++<<<<<<< HEAD + # and reset which only drops when the PLL is done and pod completes ++======= + # PLL + m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~pod_done|~reset) + + # Generating sync2x (200Mhz) and init (25Mhz) from extclk + # sync is our "nest" frequency, cpu is the core frequency + # On ASIC, core >= nest, on FPGA, cpu <= nest... + cd_sync2x = ClockDomain("sync2x", local=False) + cd_sync2x_unbuf = ClockDomain("sync2x_unbuf", + local=False, reset_less=True) + cd_init = ClockDomain("init", local=False) + cd_sync = ClockDomain("sync", local=False) + cd_cpu = ClockDomain("cpu", local=False) + cd_dramsync = ClockDomain("dramsync", local=False) + + # create PLL clocks + pll.set_clkin_freq(platform.default_clk_frequency) + pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq) + pll.create_clkout(ClockSignal("init"), 25e6) + if self.sys_clk_freq == self.core_clk_freq: + m.d.comb += ClockSignal("cpu").eq(ClockSignal("sync")) + else: + pll.create_clkout(ClockSignal("cpu"), self.core_clk_freq) + m.submodules += Instance("ECLKSYNCB", + i_ECLKI = ClockSignal("sync2x_unbuf"), + i_STOP = self.ddr_clk_stop, + o_ECLKO = ClockSignal("sync2x")) + m.domains += cd_sync2x_unbuf + m.domains += cd_sync2x + m.domains += cd_init + m.domains += cd_sync + m.domains += cd_cpu + m.domains += cd_dramsync ++>>>>>>> origin/ddr3 reset_ok = Signal(reset_less=True) + sync_reset_ok = Signal(reset_less=True) m.d.comb += reset_ok.eq(~pll.locked|~pod_done) ++<<<<<<< HEAD + m.d.comb += sync_reset_ok.eq(~pll.locked|~sync_done) ++======= + m.d.comb += ResetSignal("init").eq(reset_ok) + m.d.comb += ResetSignal("sync").eq(reset_ok) + m.d.comb += ResetSignal("cpu").eq(reset_ok) + m.d.comb += ResetSignal("dramsync").eq(reset_ok|self.ddr_clk_reset) ++>>>>>>> origin/ddr3 - # # Generating sync (100Mhz) from sync2x + # create PLL input clock from platform default frequency + pll.set_clkin_freq(platform.default_clk_frequency) ++<<<<<<< HEAD + # single or double main sync clock domain. double needs a 2nd PLL + # to match up with the CLKESYNCB, one per quadrant inside the ECP5 + if self.dram_clk_freq is not None: + m.domains += ClockDomain("sync_unbuf", local=False, reset_less=True) + m.domains += ClockDomain("sync", local=False) + pll.create_clkout(ClockSignal("sync_unbuf"), self.sys_clk_freq) + m.d.comb += ClockSignal("sync").eq(ClockSignal("sync_unbuf")) + else: + # Generating sync2x and sync from extclk, which is *only* how + # xdr=4 can be requested on the sync domain. also do not request + # an edge-clock-stop + self.phase2_domain(m, pll, "sync", self.sys_clk_freq, True) + m.d.comb += ResetSignal("sync2x").eq(sync_reset_ok) + m.d.comb += ResetSignal("sync").eq(sync_reset_ok) + + # DRAM clock: if not requested set to sync, otherwise create with + # a CLKESYNCB (which is set to no-stop at the moment) + if self.dram_clk_freq is not None: + self.phase2_domain(m, pll, "dramsync", self.dram_clk_freq, True) + else: + # alias dramsync and dramsync2x to sync and sync2x + cd_dramsync = ClockDomain("dramsync", local=False) + m.domains += cd_dramsync + m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync")) + # and a dram 2x sigh + cd_dramsync2x = ClockDomain("dramsync2x", local=False) + m.domains += cd_dramsync2x + m.d.comb += ClockSignal("dramsync2x").eq(ClockSignal("sync2x")) + # resets for the dram domains + m.d.comb += ResetSignal("dramsync2x").eq(sync_reset_ok) + m.d.comb += ResetSignal("dramsync").eq(sync_reset_ok) ++======= + m.submodules += Instance("CLKDIVF", + p_DIV="2.0", + i_ALIGNWD=0, + i_CLKI=ClockSignal("sync2x"), + i_RST=ResetSignal("dramsync"), + o_CDIVX=ClockSignal("sync")) ++>>>>>>> origin/ddr3 - # temporarily set dram sync clock exactly equal to main sync - m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync")) + # create 25 mhz "init" clock, straight (no 2x phase stuff) + # this domain can be used before all others, has its own delay + # (sync_bits) + cd_init = ClockDomain("init", local=False) + pll.create_clkout(ClockSignal("init"), 25e6) + m.domains += cd_init + m.d.comb += ResetSignal("init").eq(reset_ok) return m diff --cc src/ls2.py index 5ffab70,3f1e78a..1d40359 --- a/src/ls2.py +++ b/src/ls2.py @@@ -240,20 -256,17 +261,25 @@@ class WB64to32Convert(Elaboratable) class DDR3SoC(SoC, Elaboratable): def __init__(self, *, fpga, - dram_cls, - uart_pins, spi_0_pins, ethmac_0_pins, - ddr_pins, ddrphy_addr, dramcore_addr, ddr_addr, - fw_addr=0x0000_0000, - firmware=None, - spi0_addr, spi0_cfg_addr, - eth0_cfg_addr, eth0_irqno, + dram_cls=None, + uart_pins=None, spi_0_pins=None, ethmac_0_pins=None, + ddr_pins=None, ddrphy_addr=None, + dramcore_addr=None, ddr_addr=None, + fw_addr=0x0000_0000, firmware=None, + uart_addr=None, uart_irqno=0, + spi0_addr=None, spi0_cfg_addr=None, + eth0_cfg_addr=None, eth0_irqno=None, hyperram_addr=None, hyperram_pins=None, ++<<<<<<< HEAD + xics_icp_addr=None, xics_ics_addr=None, + clk_freq=50e6, + dram_clk_freq=None, + core_clk_freq=50e6, ++======= + nest_freq=50e6, + core_freq=50e6, ++>>>>>>> origin/ddr3 add_cpu=True): # wishbone routing is as follows: @@@ -265,15 -278,15 +291,17 @@@ # | | # +--+--+ # | + # WBAsyncBridge + # | # 64to32DownCvt # | - # arbiter------------------------------------------+ - # | | - # +---decoder----+--------+---------+-------+--------+ | - # | | | | | | | | - # uart XICS CSRs DRAM XIP SPI HyperRAM EthMAC + # arbiter------------------------------------------------------+ + # | | + # +---decoder----+--------+---------------+-------------+--------+ | + # | | | | | | | | + # | | | WBAsyncBridge | | | | + # | | | | | | | | + # uart XICS CSRs DRAM XIP SPI HyperRAM EthMAC # set up wishbone bus arbiter and decoder. arbiter routes, # decoder maps local-relative addressed satellites to global addresses @@@ -290,30 -303,43 +318,68 @@@ # set up clock request generator pod_bits = 25 ++<<<<<<< HEAD + sync_bits = 26 + need_bridge=False + if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s', + 'orangecrab','orangecrab_isim', 'rcs_arctic_tern_bmc_card']: + if fpga in ['isim','orangecrab_isim']: + pod_bits = 5 + sync_bits = 6 + if fpga in ['orangecrab', 'orangecrab_sim', + 'rcs_arctic_tern_bmc_card']: + need_bridge=True + self.crg = ECP5CRG(clk_freq, dram_clk_freq=dram_clk_freq, + pod_bits=pod_bits, sync_bits=sync_bits, + need_bridge=need_bridge) ++======= + if fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', 'ulx3s']: + if fpga in ['isim']: + pod_bits = 6 + self.crg = ECP5CRG(sys_clk_freq=nest_freq, core_clk_freq=core_freq, + pod_bits=pod_bits) ++>>>>>>> origin/ddr3 if fpga in ['arty_a7']: - self.crg = ArtyA7CRG(clk_freq) + self.crg = ArtyA7CRG(core_freq) - # set up CPU, with 64-to-32-bit downconverters + self.dram_clk_freq = dram_clk_freq + if self.dram_clk_freq is None: + self.dram_clk_freq = clk_freq + + # set up CPU, with 64-to-32-bit downconverters, and a delayed Reset if add_cpu: ++<<<<<<< HEAD + self.cpu = ExternalCore(name="ext_core") + ++======= + drs = DomainRenamer("cpu") + + self.cpu = drs(ExternalCore(name="ext_core")) + if nest_freq == core_freq: + # No Wishbone bridge required + self.asbrdbus = self.cpu.dbus + self.asbribus = self.cpu.ibus + else: + # Asynchronous Wishbone bridge required + asbrdbus = wishbone.Interface(addr_width=32, data_width=64, + granularity=8, features={'stall'}) + asbribus = wishbone.Interface(addr_width=32, data_width=64, + granularity=8, features={'stall'}) + self.dbusasyncbr = WBAsyncBridge(master_bus=self.cpu.dbus, + slave_bus=asbrdbus, + master_clock_domain="cpu", + slave_clock_domain=None, + address_width=32, data_width=64, + granularity=8, features={'stall'}) + self.ibusasyncbr = WBAsyncBridge(master_bus=self.cpu.ibus, + slave_bus=asbribus, + master_clock_domain="cpu", + slave_clock_domain=None, + address_width=32, data_width=64, + granularity=8, features={'stall'}) + self.asbrdbus = asbrdbus + self.asbribus = asbribus ++>>>>>>> origin/ddr3 cvtdbus = wishbone.Interface(addr_width=30, data_width=32, granularity=8, features={'stall'}) cvtibus = wishbone.Interface(addr_width=30, data_width=32, @@@ -361,12 -368,11 +427,17 @@@ self._decoder.add(self.bootmem.bus, addr=fw_addr) # ROM at fw_addr # System Configuration info - # offset executable ELF payload at 1 megabyte offset (1<<20) - spi_offset = 1<<20 if (spi_0_pins is not None) else None + # offset executable ELF payload at 6 megabyte offset (2<<20) + spi_offset = 2<<20 if (spi_0_pins is not None) else None dram_offset = ddr_addr if (ddr_pins is not None) else None ++<<<<<<< HEAD + self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq, + mem_clk_freq=self.dram_clk_freq, + core_clk_freq=core_clk_freq, ++======= + self.syscon = MicrowattSYSCON(sys_clk_freq=nest_freq, + core_clk_freq=core_freq, ++>>>>>>> origin/ddr3 has_uart=(uart_pins is not None), spi_offset=spi_offset, dram_addr=dram_offset) @@@ -416,139 -417,48 +487,180 @@@ tRAS=44)} """ ++<<<<<<< HEAD + # DRAM Module. first, create the (triple) modules: + # * DDR PHY + # * gram Core: presents PHY with a DFI Interface + # * gram Bone (aka gram-with-wishbone) connects wishbone to DFI + # from there it gets a little complicated because of supporting + # several options: simulation, synchronous, and asynchronous clocks. + # dram_clk_freq can *never* be set equal to clk_freq, if it is, + # it's assumed to be synchronous, and the dram Domains need renaming + + if ddr_pins is not None: # or fpga == 'sim': + ddrmodule = dram_cls(self.dram_clk_freq, "1:2") # match DDR3 P/N + + # remap both the sync domain (wherever it occurs) and + # the sync2x domain, if dram frequency is specified and + # not equal to the core clock + drs = None + if dram_clk_freq is not None or fpga == 'sim': + drs = lambda x: x + else: + drs = DomainRenamer({"sync": "dramsync", + "sync2x": "dramsync2x"}) ++======= + # DRAM Module + if ddr_pins is not None or fpga == 'sim': + ddrmodule = dram_cls(nest_freq, "1:2") # match DDR3 ASIC P/N ++>>>>>>> origin/ddr3 - #drs = lambda x: x - drs = DomainRenamer("dramsync") + features = set() + if dram_clk_freq is None: + features.add("stall") + # create the PHY (fake one for sim) if fpga == 'sim': + settings = sim_ddr3_settings(self.dram_clk_freq) self.ddrphy = FakePHY(module=ddrmodule, ++<<<<<<< HEAD + settings=settings, + verbosity=SDRAM_VERBOSE_DBG, + clk_freq=self.dram_clk_freq) + else: + self.ddrphy = drs(ECP5DDRPHY(ddr_pins, + #features=features, + sys_clk_freq=self.dram_clk_freq)) ++======= + settings=sim_ddr3_settings(nest_freq), + verbosity=SDRAM_VERBOSE_DBG, + clk_freq=nest_freq) + else: + self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=nest_freq)) + self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) ++>>>>>>> origin/ddr3 + # create the core (bridge from PHY to DFI) dramcore = gramCore(phy=self.ddrphy, geom_settings=ddrmodule.geom_settings, timing_settings=ddrmodule.timing_settings, ++<<<<<<< HEAD + #features=features, + clk_freq=self.dram_clk_freq) + self.dramcore = drs(dramcore) + + # create the wishbone presentation (wishbone to DFI) + drambone = gramWishbone(dramcore, features=features) + self.drambone = drs(drambone) + + # this is the case where sys_clk === dram_clk. no ASync Bridge + # needed, so just let the phy core and wb-dfi be connected + # directly to WB decoder. both are running in "sync" domain + # (because of the DomainRenamer, above) + + if ddr_pins is not None and dram_clk_freq is None: + self.ddrphy_bus = self.ddrphy.bus + self.dramcore_bus = self.dramcore.bus + self.drambone_bus = self.drambone.bus + + # this covers the case where sys_clk != dram_clk: three separate + # ASync Bridges are constructed (!) and the interface that's to + # be wired to the WB decoder is the async bus because that's running + # in the "sync" domain. + + if ddr_pins is not None and dram_clk_freq is not None: + # Set up Wishbone asynchronous bridge + pabus = wishbone.Interface(addr_width=self.ddrphy.bus.addr_width, + data_width=self.ddrphy.bus.data_width, + granularity=self.ddrphy.bus.granularity, + features={'stall'}) + self.ddrphy_bus = pabus + self.ddrphy_bus.memory_map = self.ddrphy.bus.memory_map + + pabr = WBAsyncBridge(master_bus=self.ddrphy_bus, + slave_bus=self.ddrphy.bus, + master_clock_domain=None, + slave_clock_domain="dramsync", + address_width=self.ddrphy.bus.addr_width, + data_width=self.ddrphy.bus.data_width, + granularity=self.ddrphy.bus.granularity) + self.ddrphy_async_br = pabr + + # Set up Wishbone asynchronous bridge + dab = wishbone.Interface(addr_width=self.dramcore.bus.addr_width, + data_width=self.dramcore.bus.data_width, + granularity=self.dramcore.bus.granularity, + features={'stall'}) + self.dramcore_bus = dab + self.dramcore_bus.memory_map = self.dramcore.bus.memory_map + + dac = WBAsyncBridge(master_bus=self.dramcore_bus, + slave_bus=self.dramcore.bus, + master_clock_domain=None, + slave_clock_domain="dramsync", + address_width=self.dramcore.bus.addr_width, + data_width=self.dramcore.bus.data_width, + granularity=self.dramcore.bus.granularity) + self.dramcore_async_br = dac + + # Set up Wishbone asynchronous bridge + bab = wishbone.Interface(addr_width=self.drambone.bus.addr_width, + data_width=self.drambone.bus.data_width, + granularity=self.drambone.bus.granularity, + features={'stall'}) + self.drambone_bus = bab + self.drambone_bus.memory_map = self.drambone.bus.memory_map + + bab = WBAsyncBridge(master_bus=self.drambone_bus, + slave_bus=self.drambone.bus, + master_clock_domain=None, + slave_clock_domain="dramsync", + address_width=self.drambone.bus.addr_width, + data_width=self.drambone.bus.data_width, + granularity=self.drambone.bus.granularity) + self.drambone_async_br = bab + + if ddr_pins is not None: + # Add wishbone decoders + self._decoder.add(self.dramcore_bus, addr=dramcore_addr) + self._decoder.add(self.drambone_bus, addr=ddr_addr) + self._decoder.add(self.ddrphy_bus, addr=ddrphy_addr) + + # additional SRAM at address if DRAM is not also at 0x0 + # (TODO, check Flash, and HyperRAM as well) + if ((ddr_pins is None or ddr_addr != 0x0) and fw_addr != 0 and + hyperram_addr[0] != 0x0): + print ("SRAM 0x8000 at address 0x0") + sram_width = 32 + self.sram = SRAMPeripheral(size=0x8000, + data_width=sram_width, + writable=True) + self._decoder.add(self.sram.bus, addr=0x0) # RAM at 0x0 ++======= + clk_freq=nest_freq) + if fpga == 'sim': + self.dramcore = dramcore + else: + self.dramcore = drs(dramcore) + self._decoder.add(self.dramcore.bus, addr=dramcore_addr) + + # map the DRAM onto Wishbone, XXX use stall but set classic below + drambone = gramWishbone(dramcore, features={'stall'}) + if fpga == 'sim': + self.drambone = drambone + else: + self.drambone = drs(drambone) + self._decoder.add(self.drambone.bus, addr=ddr_addr) + + # additional SRAM at address if DRAM is not also at 0x0 + # (TODO, check Flash, and HyperRAM as well) + if ddr_addr != 0x0: + sram_width = 32 + self.bootmem = SRAMPeripheral(size=0x8000, + data_width=sram_width, + writable=True) + self._decoder.add(self.bootmem.bus, addr=0x0) # RAM at 0x0 ++>>>>>>> origin/ddr3 # SPI controller if spi_0_pins is not None and fpga in ['sim', @@@ -655,56 -554,48 +767,74 @@@ if hasattr(self, "cpu"): m.submodules.intc = self.intc m.submodules.extcore = self.cpu + m.submodules.dbusasyncbr = self.dbusasyncbr + m.submodules.ibusasyncbr = self.ibusasyncbr m.submodules.dbuscvt = self.dbusdowncvt m.submodules.ibuscvt = self.ibusdowncvt - # create stall sigs, assume wishbone classic - #ibus, dbus = self.cvtibus, self.cvtdbus - #comb += ibus.stall.eq(ibus.stb & ~ibus.ack) - #comb += dbus.stall.eq(dbus.stb & ~dbus.ack) + # add wb async bridge verilog source. assumes a directory structure where + # microwatt has been checked out in a common subdirectory with: + # git clone https://github.com/alexforencich/verilog-wishbone.git + # git checkout d1fa24a0 + verilog_wishbone = "../../verilog-wishbone/rtl" + pth = os.path.split(__file__)[0] + pth = os.path.join(pth, verilog_wishbone) + fname = os.path.abspath(pth) + print (fname) + self.dbusasyncbr.add_verilog_source(fname, platform) + self.ibusasyncbr.add_verilog_source(fname, platform) + m.submodules.arbiter = self._arbiter m.submodules.decoder = self._decoder if hasattr(self, "ddrphy"): m.submodules.ddrphy = self.ddrphy m.submodules.dramcore = self.dramcore m.submodules.drambone = drambone = self.drambone - # grrr, same problem with drambone: not WB4-pipe compliant - comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack) + + # add async wishbone bridges + if hasattr(self, "ddrphy_async_br"): + m.submodules.ddrphy_async_br = self.ddrphy_async_br + if hasattr(self, "dramcore_async_br"): + m.submodules.dramcore_async_br = self.dramcore_async_br + if hasattr(self, "drambone_async_br"): + m.submodules.drambone_async_br = self.drambone_async_br + + # grrr, same problem with WB async bridge: not WB4-pipe compliant + dab = self.ddrphy_bus + if hasattr(dab, "stall"): + comb += dab.stall.eq(dab.cyc & ~dab.ack) + dab = self.dramcore_bus + if hasattr(dab, "stall"): + comb += dab.stall.eq(dab.cyc & ~dab.ack) + dab = self.drambone_bus + comb += dab.stall.eq(dab.cyc & ~dab.ack) + + # add wb async bridge verilog source. assumes directory structure + # where bridge has been checked out in a common subdirectory with: + # git clone https://github.com/alexforencich/verilog-wishbone.git + # git checkout d1fa24a0 + verilog_wishbone = "../../verilog-wishbone/rtl" + pth = os.path.split(__file__)[0] + pth = os.path.join(pth, verilog_wishbone) + fname = os.path.abspath(pth) + print (fname) + if hasattr(self, "ddrphy_async_br"): + self.dramcore_async_br.add_verilog_source(fname, platform) + if hasattr(self, "drambone_async_br"): + self.drambone_async_br.add_verilog_source(fname, platform) + # DRAM clock control / reset signals + comb += self.crg.ddr_clk_stop.eq(self.ddrphy.init.stop) + comb += self.crg.ddr_clk_reset.eq(self.ddrphy.init.reset) + # add hyperram module - if hasattr(self, "hyperram"): - m.submodules.hyperram = hyperram = self.hyperram + for i, hr in enumerate(self.hyperram): + m.submodules["hyperram%d" % i] = hr # grrr, same problem with hyperram: not WB4-pipe compliant - comb += hyperram.bus.stall.eq(hyperram.bus.cyc & ~hyperram.bus.ack) - # set 3 top CSn lines to zero for now + comb += hr.bus.stall.eq(hr.bus.cyc & ~hr.bus.ack) + # reset if self.fpga == 'arty_a7': - comb += hyperram.phy.rst_n.eq(ResetSignal()) + comb += hr.phy.rst_n.eq(ResetSignal()) # add blinky lights so we know FPGA is alive if platform is not None: @@@ -868,41 -723,31 +998,70 @@@ def build_platform(fpga, firmware) print ("platform", fpga, firmware, platform) # set clock frequency ++<<<<<<< HEAD + clk_freq = 70e6 + dram_clk_freq = None + if fpga == 'sim': + clk_freq = 100e6 + dram_clk_freq = clk_freq + if fpga == 'isim': + clk_freq = 50e6 # below 50 mhz, stops DRAM being enabled + #dram_clk_freq = clk_freq + dram_clk_freq = 100e6 + if fpga == 'versa_ecp5': + clk_freq = 50e6 # crank right down to timing threshold + #dram_clk_freq = 55e6 ++======= + core_freq = 70e6 + nest_freq = core_freq + if fpga == 'sim': + core_freq = 100e6 + nest_freq = core_freq + if fpga == 'isim': + core_freq = 55e6 # below 50 mhz, stops DRAM being enabled + nest_freq = core_freq + if fpga == 'versa_ecp5': + core_freq = 55e6 # crank right down to test hyperram + nest_freq = core_freq ++>>>>>>> origin/ddr3 if fpga == 'versa_ecp5_85': + core_freq = 50e6 + # DDR3 system interface is clocked at the nest frequency. # 50MHz works. 100MHz works. 55MHz does NOT work. ++<<<<<<< HEAD + # Stick with multiples of 50MHz... + clk_freq = 50e6 + dram_clk_freq = 100e6 + if fpga == 'arty_a7': + clk_freq = 27.0e6 # urrr "working" with the QSPI core (25 mhz does not) + if fpga == 'ulx3s': + clk_freq = 40.0e6 + if fpga == 'orangecrab' or fpga=='orangecrab_isim': + clk_freq = 50e6 + core_clk_freq = clk_freq + + # merge dram_clk_freq with clk_freq if the same + if clk_freq == dram_clk_freq: + dram_clk_freq = None + + # see if dram can be enabled + enable_dram = False + if dram_clk_freq is not None and dram_clk_freq >= 50e6: + enable_dram = True + if dram_clk_freq is None and clk_freq >= 50e6: + enable_dram = True ++======= + # Stick with multiples of 25MHz... + # Note the actual DDR3 clock is 2x the nest, as the name + # implies... + nest_freq = 75e6 + if fpga == 'arty_a7': + core_freq = 50e6 + nest_freq = core_freq + if fpga == 'ulx3s': + core_freq = 40.0e6 + nest_freq = core_freq ++>>>>>>> origin/ddr3 # select a firmware address fw_addr = None @@@ -924,12 -764,11 +1083,17 @@@ # get DDR resource pins, disable if clock frequency is below 50 mhz for now ddr_pins = None ++<<<<<<< HEAD + if (enable_dram and platform is not None and + fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim', + 'orangecrab','orangecrab_isim']): # not yet 'arty_a7', ++======= + if (nest_freq >= 50e6 and platform is not None and + fpga in ['versa_ecp5', 'versa_ecp5_85', 'arty_a7', 'isim']): ++>>>>>>> origin/ddr3 ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"}, - xdr={"rst": 1, "clk":4, "a":4, + xdr={"rst": 4, "clk":4, "a":4, "ba":4, "clk_en":4, "odt":4, "ras":4, "cas":4, "we":4, "cs": 4}) @@@ -1094,9 -895,9 +1258,15 @@@ ddr_addr=0x00000000, # DRAM_BASE spi0_addr=0xf0000000, # SPI0_BASE spi0_cfg_addr=0xc0006000, # SPI0_CTRL_BASE ++<<<<<<< HEAD + eth0_cfg_addr=0xc000c000, # ETH0_CTRL_BASE (4k) + eth0_irqno=1, # ETH0_IRQ number (match microwatt) + hyperram_addr=hyperram_addr, # determined above ++======= + eth0_cfg_addr=0xc0004000, # ETH0_CTRL_BASE (4k) + eth0_irqno=0, # ETH0_IRQ number + hyperram_addr=0xa0000000, # HYPERRAM_BASE ++>>>>>>> origin/ddr3 fw_addr=fw_addr, #fw_addr=None, ddr_pins=ddr_pins, @@@ -1107,11 -906,8 +1277,16 @@@ ethmac_0_pins=ethmac_0_pins, hyperram_pins=hyperram_pins, firmware=firmware, ++<<<<<<< HEAD + xics_icp_addr=0xc000_4000, # XICS_ICP_BASE + xics_ics_addr=0xc000_5000, # XICS_ICS_BASE + clk_freq=clk_freq, + dram_clk_freq=dram_clk_freq, + core_clk_freq=core_clk_freq, ++======= + core_freq=core_freq, + nest_freq=nest_freq, ++>>>>>>> origin/ddr3 add_cpu=True) if toolchain == 'Trellis':