From a7cb05b0104a89144ba7e16cf0cb86e98b8eb395 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 10 Mar 2022 12:41:09 +0000 Subject: [PATCH] sigh gramWishbone is not WB4-pipeline-burst-compliant compensate for this with the "usual" WB3 classic trick stall=cyc&~ack; --- Makefile | 6 +++--- coldboot/Makefile | 4 ++-- coldboot/coldboot.c | 38 +++++++++++++++++++++++--------------- coldboot/powerpc.lds | 8 ++++---- lib/console.c | 1 + libgram/src/calibration.c | 13 +++++-------- libgram/src/dfii.c | 16 ++++++++++------ libgram/src/dfii.h | 1 + libgram/src/init.c | 15 ++++++++++++--- src/ls2.py | 11 +++++++---- src/simsoctb.v | 2 +- 11 files changed, 69 insertions(+), 46 deletions(-) diff --git a/Makefile b/Makefile index 0943765..a41fe72 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ endif # Hello world MEMORY_SIZE=8192 -RAM_INIT_FILE=hello_world/hello_world.bin +#RAM_INIT_FILE=hello_world/hello_world.bin RAM_INIT_FILE=coldboot/coldboot.bin SIM_MAIN_BRAM=false @@ -44,8 +44,8 @@ FPGA_TARGET ?= verilator ifeq ($(FPGA_TARGET), verilator) RESET_LOW=true -CLK_INPUT=50000000 -CLK_FREQUENCY=50000000 +CLK_INPUT=100000000 +CLK_FREQUENCY=100000000 clkgen=fpga/clk_gen_bypass.vhd endif diff --git a/coldboot/Makefile b/coldboot/Makefile index 7ac6333..1180178 100644 --- a/coldboot/Makefile +++ b/coldboot/Makefile @@ -7,8 +7,8 @@ endif # a multiplier factor on sleep loops. this allows simulations to run # at much shorter intervals -LONG_TIMER_MULT ?= 100000 -SHORT_TIMER_MULT ?= 1000 +LONG_TIMER_MULT ?= 10000 +SHORT_TIMER_MULT ?= 100 #LONG_TIMER_MULT ?= 1 #SHORT_TIMER_MULT ?= 1 diff --git a/coldboot/coldboot.c b/coldboot/coldboot.c index d85ae76..408b90c 100644 --- a/coldboot/coldboot.c +++ b/coldboot/coldboot.c @@ -59,12 +59,12 @@ void isr(void) { } int main(void) { - const int kNumIterations = 65536; + const int kNumIterations = 14; int res, failcnt = 0; uint32_t tmp; volatile uint32_t *ram = DRAM_BASE; console_init(); - puts("Firmware launched...\n"); + //puts("Firmware launched...\n"); puts("DRAM init... "); struct gramCtx ctx; @@ -72,8 +72,8 @@ int main(void) { .mode_registers = { 0x320, 0x6, 0x200, 0x0 }, - .rdly_p0 = 2, - .rdly_p1 = 2, + .rdly_p0 = 0, + .rdly_p1 = 0, }; struct gramProfile profile2; gram_init(&ctx, &profile, (void*)DRAM_BASE, //0x10000000, @@ -81,15 +81,25 @@ int main(void) { (void*)DRAM_INIT_BASE); //0x00008000); puts("done\n"); +#if 0 + // quick write/read + writel(0xDEAF0000, (unsigned long)&(ram[0])); + tmp = readl((unsigned long)&(ram[0])); + puts("read "); + uart_writeuint32(tmp); + + return 0; +#endif + +#if 0 puts("Rdly\np0: "); for (size_t i = 0; i < 8; i++) { profile2.rdly_p0 = i; gram_load_calibration(&ctx, &profile2); - puts("loaded "); gram_reset_burstdet(&ctx); - puts("burstreset "); + for (size_t j = 0; j < 128; j++) { - tmp = readl((unsigned long)&ram[i]); + tmp = readl((unsigned long)&(ram[i])); } if (gram_read_burstdet(&ctx, 0)) { puts("1"); @@ -103,11 +113,9 @@ int main(void) { for (size_t i = 0; i < 8; i++) { profile2.rdly_p1 = i; gram_load_calibration(&ctx, &profile2); - puts("loaded "); gram_reset_burstdet(&ctx); - puts("burstreset "); for (size_t j = 0; j < 128; j++) { - tmp = readl((unsigned long)&ram[i]); + tmp = readl((unsigned long)&(ram[i])); } if (gram_read_burstdet(&ctx, 1)) { puts("1"); @@ -127,6 +135,8 @@ int main(void) { } puts("done\n"); +#endif + puts("Auto calibration profile:"); puts("p0 rdly:"); uart_writeuint32(profile2.rdly_p0); @@ -136,16 +146,16 @@ int main(void) { puts("DRAM test... \n"); for (size_t i = 0; i < kNumIterations; i++) { - ram[i] = 0xDEAF0000 | i*4; + writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i])); } for (size_t i = 0; i < kNumIterations; i++) { - if (ram[i] != (0xDEAF0000 | i*4)) { + if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) { puts("fail : *(0x"); uart_writeuint32(&ram[i]); puts(") = "); uart_writeuint32(ram[i]); - putchar('\n'); + puts("\n"); failcnt++; if (failcnt > 10) { @@ -156,8 +166,6 @@ int main(void) { } puts("done\n"); - while (1); - return 0; } diff --git a/coldboot/powerpc.lds b/coldboot/powerpc.lds index 99611ab..00aba33 100644 --- a/coldboot/powerpc.lds +++ b/coldboot/powerpc.lds @@ -7,9 +7,9 @@ SECTIONS } . = ALIGN(0x1000); .text : { *(.text) *(.text.*) *(.rodata) *(.rodata.*) } - . = ALIGN(0x1000); + . = ALIGN(0x10); .data : { *(.data) *(.data.*) *(.got) *(.toc) } - . = ALIGN(0x80); + . = ALIGN(0x10); __bss_start = .; .bss : { *(.dynsbss) @@ -20,8 +20,8 @@ SECTIONS *(.common) *(.bss.*) } - . = ALIGN(0x80); + . = ALIGN(0x10); __bss_end = .; - . = . + 0x4000; + . = . + 0x5000; __stack_top = .; } diff --git a/lib/console.c b/lib/console.c index b44c024..d5c8af1 100644 --- a/lib/console.c +++ b/lib/console.c @@ -155,6 +155,7 @@ int getchar(void) int putchar(int c) { + return c; if (uart_is_std) { while(std_uart_tx_full()) /* Do Nothing */; diff --git a/libgram/src/calibration.c b/libgram/src/calibration.c index 21e698c..8626b62 100644 --- a/libgram/src/calibration.c +++ b/libgram/src/calibration.c @@ -58,13 +58,13 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro profile->rdly_p0 = rdly; gram_load_calibration(ctx, profile); gram_reset_burstdet(ctx); - puts("reset burst"); + //puts("reset burst"); for (i = 0; i < 128; i++) { - tmp = readl(&ram[i]); + tmp = readl((unsigned long)&(ram[i])); } - puts("read burst?"); + //puts("read burst?"); if (gram_read_burstdet(ctx, 0)) { min_rdly_p0 = rdly; break; @@ -97,7 +97,7 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { - tmp = readl(&ram[i]); + tmp = readl((unsigned long)&(ram[i])); } if (!gram_read_burstdet(ctx, 0)) { @@ -114,7 +114,7 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { - tmp = readl(&ram[i]); + tmp = readl((unsigned long)&(ram[i])); } if (!gram_read_burstdet(ctx, 1)) { @@ -136,10 +136,7 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro void gram_load_calibration(const struct gramCtx *ctx, const struct gramProfile *profile) { dfii_setsw(ctx, true); - puts("set rdly 0?"); set_rdly(ctx, 0, profile->rdly_p0); - puts("set rdly 1?"); set_rdly(ctx, 1, profile->rdly_p1); - puts("set hw"); dfii_setsw(ctx, false); } diff --git a/libgram/src/dfii.c b/libgram/src/dfii.c index 7fbf927..a915a27 100644 --- a/libgram/src/dfii.c +++ b/libgram/src/dfii.c @@ -14,6 +14,10 @@ static void dfii_setcontrol(const struct gramCtx *ctx, uint32_t val) { #endif } +void dfii_rst(const struct gramCtx *ctx) { + dfii_setcontrol(ctx, DFII_CONTROL_ODT| DFII_CONTROL_RESET); +} + void dfii_setsw(const struct gramCtx *ctx, bool software_control) { if (software_control) { dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT| @@ -27,7 +31,7 @@ void dfii_set_p0_address(const struct gramCtx *ctx, uint32_t val) { #ifdef GRAM_RW_FUNC gram_write(ctx, (void*)&(ctx->core->phases[0].address), val); #else - writel(val, (unsigned long)&ctx->core->phases[0].address); + writel(val, (unsigned long)&(ctx->core->phases[0].address)); #endif } @@ -35,7 +39,7 @@ void dfii_set_p0_baddress(const struct gramCtx *ctx, uint32_t val) { #ifdef GRAM_RW_FUNC gram_write(ctx, (void*)&(ctx->core->phases[0].baddress), val); #else - writel(val, (unsigned long)&ctx->core->phases[0].baddress); + writel(val, (unsigned long)&(ctx->core->phases[0].baddress)); #endif } @@ -44,8 +48,8 @@ void dfii_p0_command(const struct gramCtx *ctx, uint32_t cmd) { gram_write(ctx, (void*)&(ctx->core->phases[0].command), cmd); gram_write(ctx, (void*)&(ctx->core->phases[0].command_issue), 1); #else - writel(cmd, (unsigned long)&ctx->core->phases[0].command); - writel(1, (unsigned long)&ctx->core->phases[0].command_issue); + writel(cmd, (unsigned long)&(ctx->core->phases[0].command)); + writel(1, (unsigned long)&(ctx->core->phases[0].command_issue)); #endif } @@ -61,13 +65,13 @@ void dfii_initseq(const struct gramCtx *ctx, const struct gramProfile *profile) /* Release reset */ dfii_set_p0_address(ctx, 0x0); dfii_set_p0_baddress(ctx, 0); - dfii_setcontrol(ctx, DFII_CONTROL_ODT); + dfii_setcontrol(ctx, DFII_CONTROL_ODT|DFII_CONTROL_RESET); cdelay(5*LONG_TIMER_MULT); /* Bring CKE high */ dfii_set_p0_address(ctx, 0x0); dfii_set_p0_baddress(ctx, 0); - dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT); + dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET); cdelay(1*LONG_TIMER_MULT); /* Load Mode Register 2, CWL=5 */ diff --git a/libgram/src/dfii.h b/libgram/src/dfii.h index 3e84f8c..8e23b6d 100644 --- a/libgram/src/dfii.h +++ b/libgram/src/dfii.h @@ -14,6 +14,7 @@ #define DFII_COMMAND_RAS (1 << 3) #define DFII_COMMAND_WRDATA (1 << 4) +void dfii_reset(const struct gramCtx *ctx); void dfii_setsw(const struct gramCtx *ctx, bool software_control); void dfii_initseq(const struct gramCtx *ctx, const struct gramProfile *profile); void dfii_set_p0_address(const struct gramCtx *ctx, uint32_t val); diff --git a/libgram/src/init.c b/libgram/src/init.c index 3d6c7a4..91d3fd2 100644 --- a/libgram/src/init.c +++ b/libgram/src/init.c @@ -1,19 +1,28 @@ #include #include "dfii.h" #include "console.h" +#include "hw_regs.h" +#include "helpers.h" + +extern void uart_writeuint32(uint32_t val); int gram_init(struct gramCtx *ctx, const struct gramProfile *profile, void *ddr_base, void *core_base, void *phy_base) { ctx->ddr_base = ddr_base; ctx->core = core_base; ctx->phy = phy_base; + //puts("phy base"); + //uart_writeuint32((unsigned long)&(ctx->phy)); + //puts("bdet"); + //uart_writeuint32((unsigned long)&(ctx->phy->burstdet)); + + //dfii_reset(ctx); + //puts("reset\n"); dfii_setsw(ctx, true); - puts("dfii_setsw\n"); + //puts("dfii_setsw\n"); dfii_initseq(ctx, profile); puts("initseq\n"); gram_load_calibration(ctx, profile); - puts("cal"); - dfii_setsw(ctx, false); return GRAM_ERR_NONE; } diff --git a/src/ls2.py b/src/ls2.py index f45bb97..bcb4b3b 100644 --- a/src/ls2.py +++ b/src/ls2.py @@ -234,7 +234,7 @@ class DDR3SoC(SoC, Elaboratable): # ibus dbus # | | # +--+--+ - # | + # | # 64to32DownCvt # | # arbiter @@ -358,8 +358,8 @@ class DDR3SoC(SoC, Elaboratable): self.dramcore = drs(dramcore) self._decoder.add(self.dramcore.bus, addr=dramcore_addr) - # map the DRAM onto Wishbone - drambone = gramWishbone(dramcore) + # map the DRAM onto Wishbone, XXX use stall but set classic below + drambone = gramWishbone(dramcore, features={'stall'}) if fpga == 'sim': self.drambone = drambone else: @@ -418,7 +418,10 @@ class DDR3SoC(SoC, Elaboratable): if hasattr(self, "ddrphy"): m.submodules.ddrphy = self.ddrphy m.submodules.dramcore = self.dramcore - m.submodules.drambone = self.drambone + m.submodules.drambone = drambone = self.drambone + # grrr, same problem with drambone: not WB4-pipe compliant + comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack) + # add blinky lights so we know FPGA is alive if platform is not None: m.submodules.blinky = Blinky() diff --git a/src/simsoctb.v b/src/simsoctb.v index 3ba4c91..64fb39a 100644 --- a/src/simsoctb.v +++ b/src/simsoctb.v @@ -47,7 +47,7 @@ module simsoctb; ddr3 #( .check_strict_timing(0) ) ram_chip ( - .rst_n(~dram_rst), + .rst_n(dram_rst), .ck(dram_ck), .ck_n(~dram_ck), .cke(dram_cke), -- 2.30.2