From: Luke Kenneth Casson Leighton Date: Mon, 21 Feb 2022 18:40:25 +0000 (+0000) Subject: * use readl and writel for accessing memory X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7b4774315f144decad50ace3a46b44b85665015b;p=ls2.git * use readl and writel for accessing memory * add #defines for timer loops to make it possible to shorten time taken in simulations when running firmware in verilator * try pulling DRAM DFII reset HI under software control * split out DomainRenamer for DRAM Core * add strange-looking way to expose DFII pads on FakePHY (simulated PH) which ensures that, under simulation, a batch of HDL does not get deleted: the clk_en, reset and odt parameters deep in the DFII interface connected to CSRs are *not* actually connected to anything "real" and consequently get deleted... oh and anything connecting to them) * add some firmware debug print statements that need to go some time --- diff --git a/coldboot/Makefile b/coldboot/Makefile index ef71b65..1180178 100644 --- a/coldboot/Makefile +++ b/coldboot/Makefile @@ -5,6 +5,13 @@ ifneq ("$(ARCH)", "ppc64le") endif endif +# a multiplier factor on sleep loops. this allows simulations to run +# at much shorter intervals +LONG_TIMER_MULT ?= 10000 +SHORT_TIMER_MULT ?= 100 +#LONG_TIMER_MULT ?= 1 +#SHORT_TIMER_MULT ?= 1 + LIBGRAMDIR = ../libgram LIBGRAMINC = ../libgram/include @@ -20,7 +27,9 @@ CFLAGS = -Os -g -Wall -std=c99 -msoft-float -mno-string \ -mno-multiple -mno-vsx -mno-altivec -mlittle-endian \ -fno-stack-protector -mstrict-align -ffreestanding \ -fdata-sections -ffunction-sections -I../include \ - -I $(LIBGRAMINC) + -I $(LIBGRAMINC) \ + -DLONG_TIMER_MULT=$(LONG_TIMER_MULT) \ + -DSHORT_TIMER_MULT=$(SHORT_TIMER_MULT) ASFLAGS = $(CFLAGS) LDFLAGS = -T powerpc.lds diff --git a/coldboot/coldboot.c b/coldboot/coldboot.c index 2aec9f2..d85ae76 100644 --- a/coldboot/coldboot.c +++ b/coldboot/coldboot.c @@ -3,6 +3,7 @@ #include "console.h" #include "microwatt_soc.h" +#include "io.h" #include #include @@ -84,9 +85,11 @@ int main(void) { for (size_t i = 0; i < 8; i++) { profile2.rdly_p0 = i; gram_load_calibration(&ctx, &profile2); + puts("loaded "); gram_reset_burstdet(&ctx); + puts("burstreset "); for (size_t j = 0; j < 128; j++) { - tmp = ram[j]; + tmp = readl((unsigned long)&ram[i]); } if (gram_read_burstdet(&ctx, 0)) { puts("1"); @@ -100,9 +103,11 @@ int main(void) { for (size_t i = 0; i < 8; i++) { profile2.rdly_p1 = i; gram_load_calibration(&ctx, &profile2); + puts("loaded "); gram_reset_burstdet(&ctx); + puts("burstreset "); for (size_t j = 0; j < 128; j++) { - tmp = ram[j]; + tmp = readl((unsigned long)&ram[i]); } if (gram_read_burstdet(&ctx, 1)) { puts("1"); diff --git a/libgram/src/calibration.c b/libgram/src/calibration.c index bc1b223..21e698c 100644 --- a/libgram/src/calibration.c +++ b/libgram/src/calibration.c @@ -6,6 +6,7 @@ #include "dfii.h" #include "helpers.h" #include "io.h" +#include "console.h" static void set_rdly(const struct gramCtx *ctx, unsigned int phase, unsigned int rdly) { #ifdef GRAM_RW_FUNC @@ -16,9 +17,9 @@ static void set_rdly(const struct gramCtx *ctx, unsigned int phase, unsigned int } #else if (phase == 0) { - writel(rdly, (unsigned long)&ctx->phy->rdly_p0); + writel(rdly, (unsigned long)&(ctx->phy->rdly_p0)); } else if (phase == 1) { - writel(rdly, (unsigned long)&ctx->phy->rdly_p1); + writel(rdly, (unsigned long)&(ctx->phy->rdly_p1)); } #endif } @@ -35,7 +36,7 @@ bool gram_read_burstdet(const struct gramCtx *ctx, int phase) { #ifdef GRAM_RW_FUNC return !!(gram_read(ctx, (void*)&(ctx->phy->burstdet)) & (1 << phase)); #else - return !!(readl((unsigned long)&ctx->phy->burstdet) & (1 << phase)); + return !!(readl((unsigned long)&(ctx->phy->burstdet)) & (1 << phase)); #endif } @@ -51,16 +52,19 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro (void)tmp; + puts("find min"); // Find minimal rdly for (rdly = 0; rdly < 8; rdly++) { profile->rdly_p0 = rdly; gram_load_calibration(ctx, profile); gram_reset_burstdet(ctx); + puts("reset burst"); for (i = 0; i < 128; i++) { - tmp = ram[i]; + tmp = readl(&ram[i]); } + puts("read burst?"); if (gram_read_burstdet(ctx, 0)) { min_rdly_p0 = rdly; break; @@ -75,7 +79,7 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { - tmp = ram[i]; + tmp = readl(&ram[i]); } if (gram_read_burstdet(ctx, 1)) { @@ -93,7 +97,7 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { - tmp = ram[i]; + tmp = readl(&ram[i]); } if (!gram_read_burstdet(ctx, 0)) { @@ -110,7 +114,7 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { - tmp = ram[i]; + tmp = readl(&ram[i]); } if (!gram_read_burstdet(ctx, 1)) { @@ -132,7 +136,10 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro void gram_load_calibration(const struct gramCtx *ctx, const struct gramProfile *profile) { dfii_setsw(ctx, true); + puts("set rdly 0?"); set_rdly(ctx, 0, profile->rdly_p0); + puts("set rdly 1?"); set_rdly(ctx, 1, profile->rdly_p1); + puts("set hw"); dfii_setsw(ctx, false); } diff --git a/libgram/src/dfii.c b/libgram/src/dfii.c index 06aaf99..7fbf927 100644 --- a/libgram/src/dfii.c +++ b/libgram/src/dfii.c @@ -16,9 +16,10 @@ static void dfii_setcontrol(const struct gramCtx *ctx, uint32_t val) { void dfii_setsw(const struct gramCtx *ctx, bool software_control) { if (software_control) { - dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT); + dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT| + DFII_CONTROL_RESET); } else { - dfii_setcontrol(ctx, DFII_CONTROL_SEL|DFII_CONTROL_RESET); + dfii_setcontrol(ctx, DFII_CONTROL_SEL); } } @@ -61,13 +62,13 @@ void dfii_initseq(const struct gramCtx *ctx, const struct gramProfile *profile) dfii_set_p0_address(ctx, 0x0); dfii_set_p0_baddress(ctx, 0); dfii_setcontrol(ctx, DFII_CONTROL_ODT); - cdelay(50000); + cdelay(5*LONG_TIMER_MULT); /* Bring CKE high */ dfii_set_p0_address(ctx, 0x0); dfii_set_p0_baddress(ctx, 0); dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT); - cdelay(10000); + cdelay(1*LONG_TIMER_MULT); /* Load Mode Register 2, CWL=5 */ dfii_set_mr(ctx, 2, profile->mode_registers[2]); @@ -81,14 +82,14 @@ void dfii_initseq(const struct gramCtx *ctx, const struct gramProfile *profile) /* Load Mode Register 0, CL=6, BL=8 */ dfii_set_mr(ctx, 0, profile->mode_registers[0]); if (profile->mode_registers[0] & MR0_DLL_RESET) { - cdelay(100); + cdelay(1*SHORT_TIMER_MULT); dfii_set_mr(ctx, 0, profile->mode_registers[0] & ~MR0_DLL_RESET); } - cdelay(600); + cdelay(6*SHORT_TIMER_MULT); /* ZQ Calibration */ dfii_set_p0_address(ctx, 0x400); dfii_set_p0_baddress(ctx, 0); dfii_p0_command(ctx, DFII_COMMAND_WE|DFII_COMMAND_CS); - cdelay(600); + cdelay(6*SHORT_TIMER_MULT); } diff --git a/src/ls2.py b/src/ls2.py index 926256c..dcdc0de 100644 --- a/src/ls2.py +++ b/src/ls2.py @@ -176,33 +176,32 @@ class DDR3SoC(SoC, Elaboratable): if ddr_pins is not None or fpga == 'sim': ddrmodule = MT41K256M16(clk_freq, "1:2") # match DDR3 ASIC P/N + drs = DomainRenamer("dramsync") if fpga == 'sim': self.ddrphy = FakePHY(module=ddrmodule, settings=sim_ddr3_settings(clk_freq), verbosity=SDRAM_VERBOSE_DBG) else: - self.ddrphy = DomainRenamer("dramsync")(ECP5DDRPHY(ddr_pins, - sys_clk_freq=clk_freq)) + self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=clk_freq)) self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) - drs = DomainRenamer("dramsync") dramcore = gramCore(phy=self.ddrphy, geom_settings=ddrmodule.geom_settings, timing_settings=ddrmodule.timing_settings, clk_freq=clk_freq) - self._decoder.add(dramcore.bus, addr=dramcore_addr) + if fpga == 'sim': + self.dramcore = dramcore + else: + self.dramcore = drs(dramcore) + self._decoder.add(self.dramcore.bus, addr=dramcore_addr) # map the DRAM onto Wishbone drambone = gramWishbone(dramcore) - self._decoder.add(drambone.bus, addr=ddr_addr) - - # for simulation do not use a separate clock domain (yet) if fpga == 'sim': - self.dramcore = dramcore self.drambone = drambone else: - self.dramcore = drs(dramcore) self.drambone = drs(drambone) + self._decoder.add(self.drambone.bus, addr=ddr_addr) self.memory_map = self._decoder.bus.memory_map @@ -292,8 +291,19 @@ class DDR3SoC(SoC, Elaboratable): # and at the moment that's just UART tx/rx. ports = [] ports += [self.uart.tx_o, self.uart.rx_i] - if hasattr(self, "ddrphy") and hasattr(self.ddrphy, "pads"): - ports += list(self.ddrphy.pads.fields.values()) + if hasattr(self, "ddrphy"): + if hasattr(self.ddrphy, "pads"): # real PHY + ports += list(self.ddrphy.pads.fields.values()) + else: # FakePHY, get at the dfii pads, stops deletion of nets + for phase in self.dramcore.dfii.master.phases: + print ("dfi master", phase) + ports += list(phase.fields.values()) + for phase in self.dramcore.dfii.slave.phases: + print ("dfi master", phase) + ports += list(phase.fields.values()) + for phase in self.dramcore.dfii._inti.phases: + print ("dfi master", phase) + ports += list(phase.fields.values()) return ports if __name__ == "__main__": @@ -323,6 +333,11 @@ if __name__ == "__main__": else: platform = None + # set clock frequency + clk_freq = 50e6 + #if fpga == 'sim': + #clk_freq = 100e6 + # select a firmware file firmware = None fw_addr = None @@ -355,13 +370,14 @@ if __name__ == "__main__": ddr_pins=ddr_pins, uart_pins=uart_pins, firmware=firmware, - clk_freq=50e6, + clk_freq=clk_freq, add_cpu=True) if toolchain == 'Trellis': # add -abc9 option to yosys synth_ecp5 #os.environ['NMIGEN_synth_opts'] = '-abc9 -nowidelut' - os.environ['NMIGEN_synth_opts'] = '-abc9' + #os.environ['NMIGEN_synth_opts'] = '-abc9' + os.environ['NMIGEN_synth_opts'] = '-nowidelut' if platform is not None: # build and upload it