Add initial support for external DRAM init on the Raptor Versa ECP5-85 board
authorRaptor Engineering Development Team <support@raptorengineering.com>
Thu, 7 Apr 2022 18:32:41 +0000 (13:32 -0500)
committerRaptor Engineering Development Team <support@raptorengineering.com>
Thu, 7 Apr 2022 18:32:41 +0000 (13:32 -0500)
LICENSE
examples/ecp5_crg.py [new file with mode: 0644]
examples/headless-versa-85.py [new file with mode: 0644]
examples/headless/main.c
libgram/Makefile
libgram/src/calibration.c

diff --git a/LICENSE b/LICENSE
index 991cbcfa739be8d0c05a8b21014b39a2da924723..c4f0af035cb9373c6ea6b3fb844804d7c5df8db3 100644 (file)
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,5 @@
 Unless otherwise noted, Gram is Copyright 2020 / LambdaConcept
+ECP5 DDR3 fixes and extensions Copyright 2022 Raptor Engineering, LLC
 
 Initial development is based on MiSoC's LASMICON / Copyright 2007-2016 / M-Labs
                                 LiteDRAM / Copyright 2012-2018 / EnjoyDigital
diff --git a/examples/ecp5_crg.py b/examples/ecp5_crg.py
new file mode 100644 (file)
index 0000000..5c975d6
--- /dev/null
@@ -0,0 +1,257 @@
+# Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
+# Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+# Copyright (c) 2018-2020 Florent Kermarrec <florent@enjoy-digital.fr>
+# Copyright (c) 2019 Michael Betz <michibetz@gmail.com>
+#
+# Based on code from LambaConcept, from the gram example which is BSD-2-License
+# https://github.com/jeanthom/gram/tree/master/examples
+#
+# Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER
+# under EU Grants 871528 and 957073, under the LGPLv3+ License
+
+
+from nmigen import (Elaboratable, Module, Signal, ClockDomain, Instance,
+                    ClockSignal, ResetSignal, Const)
+
+__all__ = ["ECP5CRG"]
+
+
+class PLL(Elaboratable):
+    nclkouts_max    = 3
+    clki_div_range  = (1, 128+1)
+    clkfb_div_range = (1, 128+1)
+    clko_div_range  = (1, 128+1)
+    clki_freq_range = (    8e6,  400e6)
+    clko_freq_range = (3.125e6,  400e6)
+    vco_freq_range  = (  400e6,  800e6)
+
+    def __init__(self, clkin,
+                       clksel=Signal(shape=2, reset=2),
+                       reset=Signal(reset_less=True),
+                       locked=Signal()):
+        self.clkin = clkin
+        self.clkin_freq = None
+        self.clksel = clksel
+        self.locked = locked
+        self.reset  = reset
+        self.nclkouts   = 0
+        self.clkouts    = {}
+        self.config     = {}
+        self.params     = {}
+
+    def ports(self):
+        return [
+            self.clkin,
+            self.clksel,
+            self.lock,
+        ] + list(self.clkouts.values())
+
+    def set_clkin_freq(self, freq):
+        (clki_freq_min, clki_freq_max) = self.clki_freq_range
+        assert freq >= clki_freq_min
+        assert freq <= clki_freq_max
+        self.clkin_freq = freq
+
+    def create_clkout(self, cd, freq, phase=0, margin=1e-2):
+        (clko_freq_min, clko_freq_max) = self.clko_freq_range
+        assert freq >= clko_freq_min
+        assert freq <= clko_freq_max
+        assert self.nclkouts < self.nclkouts_max
+        self.clkouts[self.nclkouts] = (cd, freq, phase, margin)
+        #create_clkout_log(self.logger, cd.name, freq, margin, self.nclkouts)
+        print("clock domain", cd.domain, freq, margin, self.nclkouts)
+        self.nclkouts += 1
+
+    def compute_config(self):
+        config = {}
+        for clki_div in range(*self.clki_div_range):
+            config["clki_div"] = clki_div
+            for clkfb_div in range(*self.clkfb_div_range):
+                all_valid = True
+                vco_freq = self.clkin_freq/clki_div*clkfb_div*1 # clkos3_div=1
+                (vco_freq_min, vco_freq_max) = self.vco_freq_range
+                if vco_freq >= vco_freq_min and vco_freq <= vco_freq_max:
+                    for n, (clk, f, p, m) in sorted(self.clkouts.items()):
+                        valid = False
+                        for d in range(*self.clko_div_range):
+                            clk_freq = vco_freq/d
+                            if abs(clk_freq - f) <= f*m:
+                                config["clko{}_freq".format(n)]  = clk_freq
+                                config["clko{}_div".format(n)]   = d
+                                config["clko{}_phase".format(n)] = p
+                                valid = True
+                                break
+                        if not valid:
+                            all_valid = False
+                else:
+                    all_valid = False
+                if all_valid:
+                    config["vco"] = vco_freq
+                    config["clkfb_div"] = clkfb_div
+                    #compute_config_log(self.logger, config)
+                    print ("PLL config", config)
+                    return config
+        raise ValueError("No PLL config found")
+
+    def elaborate(self, platform):
+        config = self.compute_config()
+        clkfb = Signal()
+        self.params.update(
+            # attributes
+            a_FREQUENCY_PIN_CLKI     = str(self.clkin_freq/1e6),
+            a_ICP_CURRENT            = "6",
+            a_LPF_RESISTOR           = "16",
+            a_MFG_ENABLE_FILTEROPAMP = "1",
+            a_MFG_GMCREF_SEL         = "2",
+            # parameters
+            p_FEEDBK_PATH   = "INT_OS3", # CLKOS3 rsvd for feedback with div=1.
+            p_CLKOS3_ENABLE = "ENABLED",
+            p_CLKOS3_DIV    = 1,
+            p_CLKFB_DIV     = config["clkfb_div"],
+            p_CLKI_DIV      = config["clki_div"],
+            # reset, input clock, lock-achieved output
+            i_RST           = self.reset,
+            i_CLKI          = self.clkin,
+            o_LOCK          = self.locked,
+        )
+        # for each clock-out, set additional parameters
+        for n, (clk, f, p, m) in sorted(self.clkouts.items()):
+            n_to_l = {0: "P", 1: "S", 2: "S2"}
+            div    = config["clko{}_div".format(n)]
+            cphase = int(p*(div + 1)/360 + div)
+            self.params["p_CLKO{}_ENABLE".format(n_to_l[n])] = "ENABLED"
+            self.params["p_CLKO{}_DIV".format(n_to_l[n])]    = div
+            self.params["p_CLKO{}_FPHASE".format(n_to_l[n])] = 0
+            self.params["p_CLKO{}_CPHASE".format(n_to_l[n])] = cphase
+            self.params["o_CLKO{}".format(n_to_l[n])]        = clk
+
+        m = Module()
+        print ("params", self.params)
+        pll = Instance("EHXPLLL", **self.params)
+        m.submodules.pll = pll
+        return m
+
+        pll = Instance("EHXPLLL",
+                       p_OUTDIVIDER_MUXA='DIVA',
+                       p_OUTDIVIDER_MUXB='DIVB',
+                       p_CLKOP_ENABLE='ENABLED',
+                       p_CLKOS_ENABLE='ENABLED',
+                       p_CLKOS2_ENABLE='DISABLED',
+                       p_CLKOS3_ENABLE='DISABLED',
+                       p_CLKOP_DIV=self.CLKOP_DIV,
+                       p_CLKOS_DIV=self.CLKOS_DIV,
+                       p_CLKFB_DIV=self.CLKFB_DIV,
+                       p_CLKI_DIV=self.CLKI_DIV,
+                       p_FEEDBK_PATH='INT_OP',
+                       p_CLKOP_TRIM_POL="FALLING",
+                       p_CLKOP_TRIM_DELAY=0,
+                       p_CLKOS_TRIM_POL="FALLING",
+                       p_CLKOS_TRIM_DELAY=0,
+                       i_CLKI=self.clkin,
+                       i_RST=0,
+                       i_STDBY=0,
+                       i_PHASESEL0=0,
+                       i_PHASESEL1=0,
+                       i_PHASEDIR=0,
+                       i_PHASESTEP=0,
+                       i_PHASELOADREG=0,
+                       i_PLLWAKESYNC=0,
+                       i_ENCLKOP=1,
+                       i_ENCLKOS=1,
+                       i_ENCLKOS2=0,
+                       i_ENCLKOS3=0,
+                       o_CLKOP=self.clkout1,
+                       o_CLKOS=self.clkout2,
+                       o_CLKOS2=self.clkout3,
+                       o_CLKOS3=self.clkout4,
+                       o_LOCK=self.lock,
+                       )
+
+
+class ECP5CRG(Elaboratable):
+    def __init__(self, sys_clk_freq=100e6, pod_bits=25):
+        self.sys_clk_freq = sys_clk_freq
+        self.pod_bits = pod_bits
+
+    def elaborate(self, platform):
+        m = Module()
+
+        # Get 100Mhz from oscillator
+        extclk = platform.request(platform.default_clk)
+        cd_rawclk = ClockDomain("rawclk", local=True, reset_less=True)
+        m.d.comb += cd_rawclk.clk.eq(extclk)
+        m.domains += cd_rawclk
+
+        # Reset
+        if platform.default_rst is not None:
+            reset = platform.request(platform.default_rst).i
+        else:
+            reset = Const(0) # whoops
+
+        gsr0 = Signal()
+        gsr1 = Signal()
+
+        m.submodules += [
+            Instance("FD1S3AX", p_GSR="DISABLED",
+                                i_CK=ClockSignal("rawclk"),
+                                i_D=~reset,
+                                o_Q=gsr0),
+            Instance("FD1S3AX", p_GSR="DISABLED",
+                                i_CK=ClockSignal("rawclk"),
+                                i_D=gsr0,
+                                o_Q=gsr1),
+            Instance("SGSR", i_CLK=ClockSignal("rawclk"),
+                             i_GSR=gsr1),
+        ]
+
+        # PLL
+        m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~reset)
+
+        # Power-on delay (655us)
+        podcnt = Signal(self.pod_bits, reset=-1)
+        pod_done = Signal()
+        with m.If((podcnt != 0) & pll.locked):
+            m.d.rawclk += podcnt.eq(podcnt-1)
+        m.d.rawclk += pod_done.eq(podcnt == 0)
+
+        # Generating sync2x (200Mhz) and init (25Mhz) from extclk
+        cd_sync2x = ClockDomain("sync2x", local=False)
+        cd_sync2x_unbuf = ClockDomain("sync2x_unbuf",
+                                      local=False, reset_less=True)
+        cd_init = ClockDomain("init", local=False)
+        cd_sync = ClockDomain("sync", local=False)
+        cd_dramsync = ClockDomain("dramsync", local=False)
+
+        # create PLL clocks
+        pll.set_clkin_freq(platform.default_clk_frequency)
+        pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq)
+        pll.create_clkout(ClockSignal("init"), 25e6)
+        m.submodules += Instance("ECLKSYNCB",
+                i_ECLKI = ClockSignal("sync2x_unbuf"),
+                i_STOP  = 0,
+                o_ECLKO = ClockSignal("sync2x"))
+        m.domains += cd_sync2x_unbuf
+        m.domains += cd_sync2x
+        m.domains += cd_init
+        m.domains += cd_sync
+        m.domains += cd_dramsync
+        reset_ok = Signal(reset_less=True)
+        m.d.comb += reset_ok.eq(~pll.locked|~pod_done)
+        m.d.comb += ResetSignal("init").eq(reset_ok)
+        m.d.comb += ResetSignal("sync").eq(reset_ok)
+        m.d.comb += ResetSignal("dramsync").eq(reset_ok)
+
+        # # Generating sync (100Mhz) from sync2x
+
+        m.submodules += Instance("CLKDIVF",
+            p_DIV="2.0",
+            i_ALIGNWD=0,
+            i_CLKI=ClockSignal("sync2x"),
+            i_RST=0,
+            o_CDIVX=ClockSignal("sync"))
+
+        # temporarily set dram sync clock exactly equal to main sync
+        m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync"))
+
+        return m
+
diff --git a/examples/headless-versa-85.py b/examples/headless-versa-85.py
new file mode 100644 (file)
index 0000000..3254115
--- /dev/null
@@ -0,0 +1,84 @@
+# This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
+# This file is Copyright (c) 2022 Raptor Engineering, LLC <support@raptorengineering.com>
+
+from nmigen import *
+from nmigen.lib.cdc import ResetSynchronizer
+from nmigen_soc import wishbone, memory
+
+from lambdasoc.cpu.minerva import MinervaCPU
+from lambdasoc.periph.intc import GenericInterruptController
+from lambdasoc.periph.serial import AsyncSerialPeripheral
+from lambdasoc.periph.sram import SRAMPeripheral
+from lambdasoc.periph.timer import TimerPeripheral
+from lambdasoc.periph import Peripheral
+from lambdasoc.soc.base import SoC
+
+from gram.core import gramCore
+from gram.phy.ecp5ddrphy import ECP5DDRPHY
+from gram.modules import MT41K64M16
+from gram.frontend.wishbone import gramWishbone
+
+from nmigen_boards.versa_ecp5 import VersaECP5Platform85
+from ecp5_crg import ECP5CRG
+from uartbridge import UARTBridge
+from crg import *
+
+class DDR3SoC(SoC, Elaboratable):
+    def __init__(self, *,
+                 ddrphy_addr, dramcore_addr,
+                 ddr_addr):
+        self._decoder = wishbone.Decoder(addr_width=30, data_width=32, granularity=8,
+                                         features={"cti", "bte"})
+
+        self.crg = ECP5CRG()
+
+        self.ub = UARTBridge(divisor=868, pins=platform.request("uart", 0))
+
+        ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"},
+            xdr={"clk":4, "a":4, "ba":4, "clk_en":4, "odt":4, "ras":4, "cas":4, "we":4, "cs":4, "reset":4})
+        self.ddrphy = DomainRenamer("dramsync")(ECP5DDRPHY(ddr_pins))
+        self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr)
+
+        ddrmodule = MT41K64M16(platform.default_clk_frequency, "1:2")
+
+        self.dramcore = DomainRenamer("dramsync")(gramCore(
+            phy=self.ddrphy,
+            geom_settings=ddrmodule.geom_settings,
+            timing_settings=ddrmodule.timing_settings,
+            clk_freq=platform.default_clk_frequency))
+        self._decoder.add(self.dramcore.bus, addr=dramcore_addr)
+
+        self.drambone = DomainRenamer("dramsync")(gramWishbone(self.dramcore))
+        self._decoder.add(self.drambone.bus, addr=ddr_addr)
+
+        self.memory_map = self._decoder.bus.memory_map
+
+        self.clk_freq = platform.default_clk_frequency
+
+    def elaborate(self, platform):
+        m = Module()
+
+        m.submodules.sysclk = self.crg
+
+        m.submodules.ub = self.ub
+
+        m.submodules.decoder = self._decoder
+        m.submodules.ddrphy = self.ddrphy
+        m.submodules.dramcore = self.dramcore
+        m.submodules.drambone = self.drambone
+
+        m.d.comb += [
+            self.ub.bus.connect(self._decoder.bus),
+        ]
+
+        return m
+
+
+if __name__ == "__main__":
+    platform = VersaECP5Platform85()
+
+    soc = DDR3SoC(ddrphy_addr=0x00008000, dramcore_addr=0x00009000,
+        ddr_addr=0x10000000)
+
+    soc.build(do_build=True)
+    platform.build(soc, do_program=True)
index cfcd36e5525a17d9e27f97cd3495165cbff8f68e..bc3e98afc201ad877ff7f6bd09b22a56077a96f6 100644 (file)
@@ -31,6 +31,8 @@ uint32_t gram_read(struct gramCtx *ctx, void *addr) {
                fprintf(stderr, "gram_read error (read bytes length mismatch: %d != %d)\n", received, sizeof(reply));
        }
 
+       //printf("gram_read: 0x%08x: 0x%08x\n", addr, ntohl(reply));
+
        return ntohl(reply);
 }
 
@@ -41,6 +43,8 @@ int gram_write(struct gramCtx *ctx, void *addr, uint32_t value) {
        *(uint32_t*)(commands+2) = htonl((uint32_t)addr >> 2);
        *(uint32_t*)(commands+6) = htonl(value);
 
+       //printf("gram_write: 0x%08x: 0x%08x\n", addr, value);
+
        sent = write(*(int*)(ctx->user_data), commands, sizeof(commands));
        if (sent != sizeof(commands)) {
                fprintf(stderr, "gram_write error (sent bytes length mismatch)\n");
@@ -100,8 +104,22 @@ int main(int argc, char *argv[]) {
        uint32_t pattern[kPatternSize];
        const int kDumpWidth = 8;
        size_t i;
+       int res;
+       uint32_t tmp;
        int delay, miss = 0;
 
+       uint32_t ddr_base = 0x10000000;
+
+#if 1
+       struct gramProfile profile = {
+               .mode_registers = {
+                       0x2708, 0x2054, 0x0512, 0x0000
+               },
+               .rdly_p0 = 2,
+               .rdly_p1 = 2,
+       };
+#endif
+#if 0
        struct gramProfile profile = {
                .mode_registers = {
                        0x320, 0x6, 0x200, 0x0
@@ -109,6 +127,8 @@ int main(int argc, char *argv[]) {
                .rdly_p0 = 2,
                .rdly_p1 = 2,
        };
+#endif
+       struct gramProfile profile2;
 
        if (argc < 3) {
                fprintf(stderr, "Usage: %s port baudrate\n", argv[0]);
@@ -126,9 +146,57 @@ int main(int argc, char *argv[]) {
        ctx.user_data = &serial_port;
 
        printf("gram init... ");
-       gram_init(&ctx, &profile, (void*)0x10000000, (void*)0x00009000, (void*)0x00008000);
+       gram_init(&ctx, &profile, (void*)ddr_base, (void*)0x00009000, (void*)0x00008000);
        printf("done\n");
 
+       printf("Rdly\np0: ");
+       for (size_t i = 0; i < 8; i++) {
+               profile2.rdly_p0 = i;
+               gram_load_calibration(&ctx, &profile2);
+               gram_reset_burstdet(&ctx);
+               for (size_t j = 0; j < 128; j++) {
+                       tmp = gram_read(&ctx, ddr_base+4*j);
+               }
+               if (gram_read_burstdet(&ctx, 0)) {
+                       printf("1");
+               } else {
+                       printf("0");
+               }
+               fflush(stdout);
+       }
+       printf("\n");
+
+       printf("Rdly\np1: ");
+       for (size_t i = 0; i < 8; i++) {
+               profile2.rdly_p1 = i;
+               gram_load_calibration(&ctx, &profile2);
+               gram_reset_burstdet(&ctx);
+               for (size_t j = 0; j < 128; j++) {
+                       tmp = gram_read(&ctx, ddr_base+4*j);
+               }
+               if (gram_read_burstdet(&ctx, 1)) {
+                       printf("1");
+               } else {
+                       printf("0");
+               }
+               fflush(stdout);
+       }
+       printf("\n");
+
+        printf("Auto calibrating... ");
+        res = gram_generate_calibration(&ctx, &profile2);
+        if (res != GRAM_ERR_NONE) {
+                printf("failed\n");
+                gram_load_calibration(&ctx, &profile);
+        } else {
+                gram_load_calibration(&ctx, &profile2);
+        }
+        printf("done\n");
+
+        printf("Auto calibration profile:\n");
+        printf("\tp0 rdly: %d\n", profile2.rdly_p0);
+        printf("\tp1 rdly: %d\n", profile2.rdly_p1);
+
        gram_reset_burstdet(&ctx);
 
        srand(time(NULL));
@@ -137,7 +205,6 @@ int main(int argc, char *argv[]) {
        }
 
        printf("memtest... \n");
-       uint32_t ddr_base = 0x10000000;
 
        printf("Writing data sequence...");
        for (i = 0; i < kPatternSize; i++) {
index 6093828e83e2efc45dbe07480b9913082fb89418..4cb7a197360bc1b84a3597c63f0fa0d41d26cdf7 100644 (file)
@@ -1,6 +1,6 @@
 OBJS := src/init.o src/dfii.o src/calibration.o
 
-TRIPLE := riscv64-unknown-elf-
+TRIPLE :=
 
 CC := $(TRIPLE)gcc
 AS := $(TRIPLE)as
@@ -8,7 +8,7 @@ OBJCOPY := $(TRIPLE)objcopy
 AR := $(TRIPLE)ar
 LD := $(TRIPLE)ld
 
-CFLAGS += -fvisibility=hidden -nostdlib -Os -Iinclude -std=c99 -Wall -Werror -pedantic
+CFLAGS += -fvisibility=hidden -nostdlib -O0 -g -Iinclude -std=c99 -Wall -Wno-error -pedantic -DGRAM_RW_FUNC
 LDFLAGS += -nostdlib
 
 ifeq ($(TRIPLE),riscv64-unknown-elf-)
index a77c44afdc843eb1113251dba6ad398c4195730a..cb7b3597ff8f7cb85c9064d007c0054d71328fe7 100644 (file)
@@ -57,7 +57,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro
                gram_reset_burstdet(ctx);
 
                for (i = 0; i < 128; i++) {
+#ifdef GRAM_RW_FUNC
+                       gram_read(ctx, ctx->ddr_base);
+#else
                        tmp = ram[i];
+#endif
                }
 
                if (gram_read_burstdet(ctx, 0)) {
@@ -74,7 +78,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro
                gram_reset_burstdet(ctx);
 
                for (i = 0; i < 128; i++) {
+#ifdef GRAM_RW_FUNC
+                       gram_read(ctx, ctx->ddr_base);
+#else
                        tmp = ram[i];
+#endif
                }
 
                if (gram_read_burstdet(ctx, 1)) {
@@ -92,7 +100,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro
                gram_reset_burstdet(ctx);
 
                for (i = 0; i < 128; i++) {
+#ifdef GRAM_RW_FUNC
+                       gram_read(ctx, ctx->ddr_base);
+#else
                        tmp = ram[i];
+#endif
                }
 
                if (!gram_read_burstdet(ctx, 0)) {
@@ -109,7 +121,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro
                gram_reset_burstdet(ctx);
 
                for (i = 0; i < 128; i++) {
+#ifdef GRAM_RW_FUNC
+                       gram_read(ctx, ctx->ddr_base);
+#else
                        tmp = ram[i];
+#endif
                }
 
                if (!gram_read_burstdet(ctx, 1)) {