software/bios: fixup for Ultrascale SDRAM debug
[litex.git] / litex / soc / software / bios / sdram.c
index 8771f384efd905904605351b955937ca1e4b8234..7654caafa478e496a209b496abbd93ba7ff1d5ca 100644 (file)
@@ -1,24 +1,52 @@
+// This file is Copyright (c) 2013-2014 Sebastien Bourdeauducq <sb@m-labs.hk>
+// This file is Copyright (c) 2013-2019 Florent Kermarrec <florent@enjoy-digital.fr>
+// This file is Copyright (c) 2018 Chris Ballance <chris.ballance@physics.ox.ac.uk>
+// This file is Copyright (c) 2018 Dolu1990 <charles.papon.90@gmail.com>
+// This file is Copyright (c) 2019 Gabriel L. Somlo <gsomlo@gmail.com>
+// This file is Copyright (c) 2018 Jean-François Nguyen <jf@lambdaconcept.fr>
+// This file is Copyright (c) 2018 Sergiusz Bazanski <q3k@q3k.org>
+// This file is Copyright (c) 2018 Tim 'mithro' Ansell <me@mith.ro>
+// License: BSD
+
 #include <generated/csr.h>
-#ifdef CSR_SDRAM_BASE
 
 #include <stdio.h>
 #include <stdlib.h>
 
+#ifdef CSR_SDRAM_BASE
 #include <generated/sdram_phy.h>
+#endif
 #include <generated/mem.h>
 #include <hw/flags.h>
 #include <system.h>
 
 #include "sdram.h"
 
-static void cdelay(int i)
+// FIXME(hack): If we don't have main ram, just target the sram instead.
+#ifndef MAIN_RAM_BASE
+#define MAIN_RAM_BASE SRAM_BASE
+#endif
+
+__attribute__((unused)) static void cdelay(int i)
 {
        while(i > 0) {
 #if defined (__lm32__)
                __asm__ volatile("nop");
 #elif defined (__or1k__)
                __asm__ volatile("l.nop");
-#elif defined (__riscv__)
+#elif defined (__picorv32__)
+               __asm__ volatile("nop");
+#elif defined (__vexriscv__)
+               __asm__ volatile("nop");
+#elif defined (__minerva__)
+               __asm__ volatile("nop");
+#elif defined (__rocket__)
+               __asm__ volatile("nop");
+#elif defined (__powerpc__)
+               __asm__ volatile("nop");
+#elif defined (__microwatt__)
+               __asm__ volatile("nop");
+#elif defined (__blackparrot__)
                __asm__ volatile("nop");
 #else
 #error Unsupported architecture
@@ -27,6 +55,14 @@ static void cdelay(int i)
        }
 }
 
+#ifdef CSR_SDRAM_BASE
+
+#define DFII_ADDR_SHIFT CONFIG_CSR_ALIGNMENT/8
+
+#define CSR_DATA_BYTES CONFIG_CSR_DATA_WIDTH/8
+
+#define DFII_PIX_DATA_BYTES DFII_PIX_DATA_SIZE*CSR_DATA_BYTES
+
 void sdrsw(void)
 {
        sdram_dfii_control_write(DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET_N);
@@ -68,18 +104,22 @@ void sdrrdbuf(int dq)
 {
        int i, p;
        int first_byte, step;
+       unsigned char buf[DFII_PIX_DATA_BYTES];
 
        if(dq < 0) {
                first_byte = 0;
                step = 1;
        } else {
-               first_byte = DFII_PIX_DATA_SIZE/2 - 1 - dq;
-               step = DFII_PIX_DATA_SIZE/2;
+               first_byte = DFII_PIX_DATA_BYTES/2 - 1 - dq;
+               step = DFII_PIX_DATA_BYTES/2;
        }
 
-       for(p=0;p<DFII_NPHASES;p++)
-               for(i=first_byte;i<DFII_PIX_DATA_SIZE;i+=step)
-                       printf("%02x", MMPTR(sdram_dfii_pix_rddata_addr[p]+4*i));
+       for(p=0;p<DFII_NPHASES;p++) {
+               csr_rd_buf_uint8(sdram_dfii_pix_rddata_addr[p],
+                                buf, DFII_PIX_DATA_BYTES);
+               for(i=first_byte;i<DFII_PIX_DATA_BYTES;i+=step)
+                       printf("%02x", buf[i]);
+       }
        printf("\n");
 }
 
@@ -121,8 +161,9 @@ void sdrrderr(char *count)
        char *c;
        int _count;
        int i, j, p;
-       unsigned char prev_data[DFII_NPHASES*DFII_PIX_DATA_SIZE];
-       unsigned char errs[DFII_NPHASES*DFII_PIX_DATA_SIZE];
+       unsigned char prev_data[DFII_NPHASES][DFII_PIX_DATA_BYTES];
+       unsigned char errs[DFII_NPHASES][DFII_PIX_DATA_BYTES];
+       unsigned char new_data[DFII_PIX_DATA_BYTES];
 
        if(*count == 0) {
                printf("sdrrderr <count>\n");
@@ -134,49 +175,52 @@ void sdrrderr(char *count)
                return;
        }
 
-       for(i=0;i<DFII_NPHASES*DFII_PIX_DATA_SIZE;i++)
-                       errs[i] = 0;
+       for(p=0;p<DFII_NPHASES;p++)
+               for(i=0;i<DFII_PIX_DATA_BYTES;i++)
+                       errs[p][i] = 0;
+
        for(addr=0;addr<16;addr++) {
                sdram_dfii_pird_address_write(addr*8);
                sdram_dfii_pird_baddress_write(0);
                command_prd(DFII_COMMAND_CAS|DFII_COMMAND_CS|DFII_COMMAND_RDDATA);
                cdelay(15);
                for(p=0;p<DFII_NPHASES;p++)
-                       for(i=0;i<DFII_PIX_DATA_SIZE;i++)
-                               prev_data[p*DFII_PIX_DATA_SIZE+i] = MMPTR(sdram_dfii_pix_rddata_addr[p]+4*i);
+                       csr_rd_buf_uint8(sdram_dfii_pix_rddata_addr[p],
+                                        prev_data[p], DFII_PIX_DATA_BYTES);
 
                for(j=0;j<_count;j++) {
                        command_prd(DFII_COMMAND_CAS|DFII_COMMAND_CS|DFII_COMMAND_RDDATA);
                        cdelay(15);
-                       for(p=0;p<DFII_NPHASES;p++)
-                               for(i=0;i<DFII_PIX_DATA_SIZE;i++) {
-                                       unsigned char new_data;
-
-                                       new_data = MMPTR(sdram_dfii_pix_rddata_addr[p]+4*i);
-                                       errs[p*DFII_PIX_DATA_SIZE+i] |= prev_data[p*DFII_PIX_DATA_SIZE+i] ^ new_data;
-                                       prev_data[p*DFII_PIX_DATA_SIZE+i] = new_data;
+                       for(p=0;p<DFII_NPHASES;p++) {
+                               csr_rd_buf_uint8(sdram_dfii_pix_rddata_addr[p],
+                                                new_data, DFII_PIX_DATA_BYTES);
+                               for(i=0;i<DFII_PIX_DATA_BYTES;i++) {
+                                       errs[p][i] |= prev_data[p][i] ^ new_data[i];
+                                       prev_data[p][i] = new_data[i];
                                }
+                       }
                }
        }
 
-       for(i=0;i<DFII_NPHASES*DFII_PIX_DATA_SIZE;i++)
-               printf("%02x", errs[i]);
+       for(p=0;p<DFII_NPHASES;p++)
+               for(i=0;i<DFII_PIX_DATA_BYTES;i++)
+                       printf("%02x", errs[p][i]);
        printf("\n");
        for(p=0;p<DFII_NPHASES;p++)
-               for(i=0;i<DFII_PIX_DATA_SIZE;i++)
-                       printf("%2x", DFII_PIX_DATA_SIZE/2 - 1 - (i % (DFII_PIX_DATA_SIZE/2)));
+               for(i=0;i<DFII_PIX_DATA_BYTES;i++)
+                       printf("%2x", DFII_PIX_DATA_BYTES/2 - 1 - (i % (DFII_PIX_DATA_BYTES/2)));
        printf("\n");
 }
 
 void sdrwr(char *startaddr)
 {
+       int i, p;
        char *c;
        unsigned int addr;
-       int i;
-       int p;
+       unsigned char buf[DFII_PIX_DATA_BYTES];
 
        if(*startaddr == 0) {
-               printf("sdrrd <address>\n");
+               printf("sdrwr <address>\n");
                return;
        }
        addr = strtoul(startaddr, &c, 0);
@@ -185,9 +229,12 @@ void sdrwr(char *startaddr)
                return;
        }
 
-       for(p=0;p<DFII_NPHASES;p++)
-               for(i=0;i<DFII_PIX_DATA_SIZE;i++)
-                       MMPTR(sdram_dfii_pix_wrdata_addr[p]+4*i) = 0x10*p + i;
+       for(p=0;p<DFII_NPHASES;p++) {
+               for(i=0;i<DFII_PIX_DATA_BYTES;i++)
+                       buf[i] = 0x10*p + i;
+               csr_wr_buf_uint8(sdram_dfii_pix_wrdata_addr[p],
+                                buf, DFII_PIX_DATA_BYTES);
+       }
 
        sdram_dfii_piwr_address_write(addr);
        sdram_dfii_piwr_baddress_write(0);
@@ -195,11 +242,43 @@ void sdrwr(char *startaddr)
 }
 
 #ifdef CSR_DDRPHY_BASE
-#ifndef A7DDRPHY_BITSLIP
+
+#if defined (USDDRPHY)
+#define ERR_DDRPHY_DELAY 512
+#define ERR_DDRPHY_BITSLIP 8
+#define NBMODULES DFII_PIX_DATA_BYTES/2
+#elif defined (ECP5DDRPHY)
+#define ERR_DDRPHY_DELAY 8
+#define ERR_DDRPHY_BITSLIP 1
+#define NBMODULES DFII_PIX_DATA_BYTES/4
+#else
+#define ERR_DDRPHY_DELAY 32
+#define ERR_DDRPHY_BITSLIP 8
+#define NBMODULES DFII_PIX_DATA_BYTES/2
+#endif
+
+#ifdef DDRPHY_CMD_DELAY
+static void ddrphy_cdly(unsigned int delay) {
+#if CSR_DDRPHY_EN_VTC_ADDR
+       ddrphy_en_vtc_write(0);
+#endif
+       ddrphy_cdly_rst_write(1);
+       while (delay > 0) {
+               ddrphy_cdly_inc_write(1);
+               cdelay(1000);
+               delay--;
+       }
+#if CSR_DDRPHY_EN_VTC_ADDR
+       ddrphy_en_vtc_write(1);
+#endif
+}
+#endif
+
+#ifdef CSR_DDRPHY_WLEVEL_EN_ADDR
 
 void sdrwlon(void)
 {
-       sdram_dfii_pi0_address_write(DDR3_MR1 | (1 << 7));
+       sdram_dfii_pi0_address_write(DDRX_MR1 | (1 << 7));
        sdram_dfii_pi0_baddress_write(1);
        command_p0(DFII_COMMAND_RAS|DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS);
        ddrphy_wlevel_en_write(1);
@@ -207,126 +286,303 @@ void sdrwlon(void)
 
 void sdrwloff(void)
 {
-       sdram_dfii_pi0_address_write(DDR3_MR1);
+       sdram_dfii_pi0_address_write(DDRX_MR1);
        sdram_dfii_pi0_baddress_write(1);
        command_p0(DFII_COMMAND_RAS|DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS);
        ddrphy_wlevel_en_write(0);
 }
 
-#define ERR_DDRPHY_DELAY 32
+static void write_delay_rst(int module) {
+#ifdef USDDRPHY
+       int i;
+#endif
+
+       /* sel module */
+       ddrphy_dly_sel_write(1 << module);
+
+       /* rst delay */
+       ddrphy_wdly_dq_rst_write(1);
+       ddrphy_wdly_dqs_rst_write(1);
+#ifdef USDDRPHY /* need to init manually on Ultrascale */
+       for(i=0; i<ddrphy_half_sys8x_taps_read(); i++)
+               ddrphy_wdly_dqs_inc_write(1);
+#endif
+
+       /* unsel module */
+       ddrphy_dly_sel_write(0);
+}
+
+static void write_delay_inc(int module) {
+       /* sel module */
+       ddrphy_dly_sel_write(1 << module);
+
+       /* inc delay */
+       ddrphy_wdly_dq_inc_write(1);
+       ddrphy_wdly_dqs_inc_write(1);
 
-static int write_level(int *delay, int *high_skew)
+       /* unsel module */
+       ddrphy_dly_sel_write(0);
+}
+
+int write_level(void)
 {
-       int i;
-       int dq_address;
-       unsigned char dq;
+       int i, j, k;
+
+       int err_ddrphy_wdly;
+
+       unsigned char taps_scan[ERR_DDRPHY_DELAY];
+
+       int one_window_active;
+       int one_window_start, one_window_best_start;
+       int one_window_count, one_window_best_count;
+
+       int delays[NBMODULES];
+
+       unsigned char buf[DFII_PIX_DATA_BYTES];
+
        int ok;
 
-       printf("Write leveling: ");
+       err_ddrphy_wdly = ERR_DDRPHY_DELAY - ddrphy_half_sys8x_taps_read();
+
+       printf("Write leveling:\n");
 
        sdrwlon();
        cdelay(100);
-       for(i=0;i<DFII_PIX_DATA_SIZE/2;i++) {
-               dq_address = sdram_dfii_pix_rddata_addr[0]+4*(DFII_PIX_DATA_SIZE/2-1-i);
-               ddrphy_dly_sel_write(1 << i);
-               ddrphy_wdly_dq_rst_write(1);
-               ddrphy_wdly_dqs_rst_write(1);
-
-               delay[i] = 0;
-
-               ddrphy_wlevel_strobe_write(1);
-               cdelay(10);
-               dq = MMPTR(dq_address);
-               if(dq != 0) {
-                       /*
-                        * Assume this DQ group has between 1 and 2 bit times of skew.
-                        * Bring DQS into the CK=0 zone before continuing leveling.
-                        */
-                       high_skew[i] = 1;
-                       while(dq != 0) {
-                               delay[i]++;
-                               if(delay[i] >= ERR_DDRPHY_DELAY)
-                                       break;
-                               ddrphy_wdly_dq_inc_write(1);
-                               ddrphy_wdly_dqs_inc_write(1);
+       for(i=0;i<NBMODULES;i++) {
+               printf("m%d: |", i);
+
+               /* rst delay */
+               write_delay_rst(i);
+
+               /* scan write delay taps */
+               for(j=0;j<err_ddrphy_wdly;j++) {
+                       int zero_count = 0;
+                       int one_count = 0;
+                       int show = 1;
+#ifdef USDDRPHY
+                       show = (j%16 == 0);
+#endif
+                       for (k=0; k<128; k++) {
                                ddrphy_wlevel_strobe_write(1);
                                cdelay(10);
-                               dq = MMPTR(dq_address);
-                        }
-               } else
-                       high_skew[i] = 0;
-
-               while(dq == 0) {
-                       delay[i]++;
-                       if(delay[i] >= ERR_DDRPHY_DELAY)
-                               break;
-                       ddrphy_wdly_dq_inc_write(1);
-                       ddrphy_wdly_dqs_inc_write(1);
-
-                       ddrphy_wlevel_strobe_write(1);
+                               csr_rd_buf_uint8(sdram_dfii_pix_rddata_addr[0],
+                                                buf, DFII_PIX_DATA_BYTES);
+                               if (buf[NBMODULES-1-i] != 0)
+                                       one_count++;
+                               else
+                                       zero_count++;
+                       }
+                       if (one_count > zero_count)
+                               taps_scan[j] = 1;
+                       else
+                               taps_scan[j] = 0;
+                       if (show)
+                               printf("%d", taps_scan[j]);
+                       write_delay_inc(i);
                        cdelay(10);
-                       dq = MMPTR(dq_address);
                }
+               printf("|");
+
+               /* find longer 1 window and set delay at the 0/1 transition */
+               one_window_active = 0;
+               one_window_start = 0;
+               one_window_count = 0;
+               one_window_best_start = 0;
+               one_window_best_count = 0;
+               delays[i] = -1;
+               for(j=0;j<err_ddrphy_wdly;j++) {
+                       if (one_window_active) {
+                               if ((taps_scan[j] == 0) | (j == err_ddrphy_wdly - 1)) {
+                                       one_window_active = 0;
+                                       one_window_count = j - one_window_start;
+                                       if (one_window_count > one_window_best_count) {
+                                               one_window_best_start = one_window_start;
+                                               one_window_best_count = one_window_count;
+                                       }
+                               }
+                       } else {
+                               if (taps_scan[j]) {
+                                       one_window_active = 1;
+                                       one_window_start = j;
+                               }
+                       }
+               }
+               delays[i] = one_window_best_start;
+
+               /* configure write delay */
+               write_delay_rst(i);
+               for(j=0; j<delays[i]; j++)
+                       write_delay_inc(i);
+               printf(" delay: %02d\n", delays[i]);
        }
+
        sdrwloff();
 
        ok = 1;
-       for(i=DFII_PIX_DATA_SIZE/2-1;i>=0;i--) {
-               printf("%2d%c ", delay[i], high_skew[i] ? '*' : ' ');
-               if(delay[i] >= ERR_DDRPHY_DELAY)
+       for(i=NBMODULES-1;i>=0;i--) {
+               if(delays[i] < 0)
                        ok = 0;
        }
 
-       if(ok)
-               printf("completed\n");
-       else
-               printf("failed\n");
-
        return ok;
 }
 
-static void read_bitslip(int *delay, int *high_skew)
+#endif /* CSR_DDRPHY_WLEVEL_EN_ADDR */
+
+static void read_delay_rst(int module) {
+       /* sel module */
+       ddrphy_dly_sel_write(1 << module);
+
+       /* rst delay */
+       ddrphy_rdly_dq_rst_write(1);
+
+       /* unsel module */
+       ddrphy_dly_sel_write(0);
+
+#ifdef ECP5DDRPHY
+       /* Sync all DQSBUFM's, By toggling all dly_sel (DQSBUFM.PAUSE) lines. */
+       ddrphy_dly_sel_write(0xFF);
+       ddrphy_dly_sel_write(0);
+#endif
+}
+
+static void read_delay_inc(int module) {
+       /* sel module */
+       ddrphy_dly_sel_write(1 << module);
+
+       /* inc delay */
+       ddrphy_rdly_dq_inc_write(1);
+
+       /* unsel module */
+       ddrphy_dly_sel_write(0);
+
+#ifdef ECP5DDRPHY
+       /* Sync all DQSBUFM's, By toggling all dly_sel (DQSBUFM.PAUSE) lines. */
+       ddrphy_dly_sel_write(0xFF);
+       ddrphy_dly_sel_write(0);
+#endif
+}
+
+static void read_bitslip_rst(char m)
 {
-       int bitslip_thr;
-       int i;
+       /* sel module */
+       ddrphy_dly_sel_write(1 << m);
 
-       bitslip_thr = 0x7fffffff;
-       for(i=0;i<DFII_PIX_DATA_SIZE/2;i++)
-               if(high_skew[i] && (delay[i] < bitslip_thr))
-                       bitslip_thr = delay[i];
-       if(bitslip_thr == 0x7fffffff)
-               return;
-       bitslip_thr = bitslip_thr/2;
-
-       printf("Read bitslip: ");
-       for(i=DFII_PIX_DATA_SIZE/2-1;i>=0;i--)
-               if(delay[i] > bitslip_thr) {
-                       ddrphy_dly_sel_write(1 << i);
-                       /* 7-series SERDES in DDR mode needs 3 pulses for 1 bitslip */
-                       ddrphy_rdly_dq_bitslip_write(1);
-                       ddrphy_rdly_dq_bitslip_write(1);
-                       ddrphy_rdly_dq_bitslip_write(1);
-                       printf("%d ", i);
+       /* inc delay */
+       ddrphy_rdly_dq_bitslip_rst_write(1);
+
+       /* unsel module */
+       ddrphy_dly_sel_write(0);
+}
+
+
+static void read_bitslip_inc(char m)
+{
+       /* sel module */
+       ddrphy_dly_sel_write(1 << m);
+
+       /* inc delay */
+       ddrphy_rdly_dq_bitslip_write(1);
+
+       /* unsel module */
+       ddrphy_dly_sel_write(0);
+}
+
+static int read_level_scan(int module, int bitslip)
+{
+       unsigned int prv;
+       unsigned char prs[DFII_NPHASES][DFII_PIX_DATA_BYTES];
+       unsigned char tst[DFII_PIX_DATA_BYTES];
+       int p, i;
+       int score;
+
+       /* Generate pseudo-random sequence */
+       prv = 42;
+       for(p=0;p<DFII_NPHASES;p++)
+               for(i=0;i<DFII_PIX_DATA_BYTES;i++) {
+                       prv = 1664525*prv + 1013904223;
+                       prs[p][i] = prv;
                }
-       printf("\n");
+
+       /* Activate */
+       sdram_dfii_pi0_address_write(0);
+       sdram_dfii_pi0_baddress_write(0);
+       command_p0(DFII_COMMAND_RAS|DFII_COMMAND_CS);
+       cdelay(15);
+
+       /* Write test pattern */
+       for(p=0;p<DFII_NPHASES;p++)
+               csr_wr_buf_uint8(sdram_dfii_pix_wrdata_addr[p],
+                                prs[p], DFII_PIX_DATA_BYTES);
+       sdram_dfii_piwr_address_write(0);
+       sdram_dfii_piwr_baddress_write(0);
+       command_pwr(DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS|DFII_COMMAND_WRDATA);
+
+       /* Calibrate each DQ in turn */
+       sdram_dfii_pird_address_write(0);
+       sdram_dfii_pird_baddress_write(0);
+       score = 0;
+
+       printf("m%d, b%d: |", module, bitslip);
+       read_delay_rst(module);
+       for(i=0;i<ERR_DDRPHY_DELAY;i++) {
+               int working = 1;
+               int show = 1;
+#ifdef USDDRPHY
+               show = (i%16 == 0);
+#endif
+#ifdef ECP5DDRPHY
+               ddrphy_burstdet_clr_write(1);
+#endif
+               command_prd(DFII_COMMAND_CAS|DFII_COMMAND_CS|DFII_COMMAND_RDDATA);
+               cdelay(15);
+               for(p=0;p<DFII_NPHASES;p++) {
+                       /* read back test pattern */
+                       csr_rd_buf_uint8(sdram_dfii_pix_rddata_addr[p],
+                                        tst, DFII_PIX_DATA_BYTES);
+                       /* verify bytes matching current 'module' */
+                       if (prs[p][  NBMODULES-1-module] != tst[  NBMODULES-1-module] ||
+                           prs[p][2*NBMODULES-1-module] != tst[2*NBMODULES-1-module])
+                               working = 0;
+               }
+#ifdef ECP5DDRPHY
+               if (((ddrphy_burstdet_seen_read() >> module) & 0x1) != 1)
+                       working = 0;
+#endif
+               if (show)
+                       printf("%d", working);
+               score += working;
+               read_delay_inc(module);
+       }
+       printf("| ");
+
+       /* Precharge */
+       sdram_dfii_pi0_address_write(0);
+       sdram_dfii_pi0_baddress_write(0);
+       command_p0(DFII_COMMAND_RAS|DFII_COMMAND_WE|DFII_COMMAND_CS);
+       cdelay(15);
+
+       return score;
 }
 
-static void read_delays(void)
+static void read_level(int module)
 {
        unsigned int prv;
-       unsigned char prs[DFII_NPHASES*DFII_PIX_DATA_SIZE];
-       int p, i, j;
+       unsigned char prs[DFII_NPHASES][DFII_PIX_DATA_BYTES];
+       unsigned char tst[DFII_PIX_DATA_BYTES];
+       int p, i;
        int working;
        int delay, delay_min, delay_max;
 
-       printf("Read delays: ");
+       printf("delays: ");
 
        /* Generate pseudo-random sequence */
        prv = 42;
-       for(i=0;i<DFII_NPHASES*DFII_PIX_DATA_SIZE;i++) {
-               prv = 1664525*prv + 1013904223;
-               prs[i] = prv;
-       }
+       for(p=0;p<DFII_NPHASES;p++)
+               for(i=0;i<DFII_PIX_DATA_BYTES;i++) {
+                       prv = 1664525*prv + 1013904223;
+                       prs[p][i] = prv;
+               }
 
        /* Activate */
        sdram_dfii_pi0_address_write(0);
@@ -336,8 +592,8 @@ static void read_delays(void)
 
        /* Write test pattern */
        for(p=0;p<DFII_NPHASES;p++)
-               for(i=0;i<DFII_PIX_DATA_SIZE;i++)
-                       MMPTR(sdram_dfii_pix_wrdata_addr[p]+4*i) = prs[DFII_PIX_DATA_SIZE*p+i];
+               csr_wr_buf_uint8(sdram_dfii_pix_wrdata_addr[p],
+                                prs[p], DFII_PIX_DATA_BYTES);
        sdram_dfii_piwr_address_write(0);
        sdram_dfii_piwr_baddress_write(0);
        command_pwr(DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS|DFII_COMMAND_WRDATA);
@@ -345,74 +601,100 @@ static void read_delays(void)
        /* Calibrate each DQ in turn */
        sdram_dfii_pird_address_write(0);
        sdram_dfii_pird_baddress_write(0);
-       for(i=0;i<DFII_PIX_DATA_SIZE/2;i++) {
-               ddrphy_dly_sel_write(1 << (DFII_PIX_DATA_SIZE/2-i-1));
-               delay = 0;
 
-               /* Find smallest working delay */
-               ddrphy_rdly_dq_rst_write(1);
-               while(1) {
-                       command_prd(DFII_COMMAND_CAS|DFII_COMMAND_CS|DFII_COMMAND_RDDATA);
-                       cdelay(15);
-                       working = 1;
-                       for(p=0;p<DFII_NPHASES;p++) {
-                               if(MMPTR(sdram_dfii_pix_rddata_addr[p]+4*i) != prs[DFII_PIX_DATA_SIZE*p+i])
-                                       working = 0;
-                               if(MMPTR(sdram_dfii_pix_rddata_addr[p]+4*(i+DFII_PIX_DATA_SIZE/2)) != prs[DFII_PIX_DATA_SIZE*p+i+DFII_PIX_DATA_SIZE/2])
-                                       working = 0;
-                       }
-                       if(working)
-                               break;
-                       delay++;
-                       if(delay >= ERR_DDRPHY_DELAY)
-                               break;
-                       ddrphy_rdly_dq_inc_write(1);
+       /* Find smallest working delay */
+       delay = 0;
+       read_delay_rst(module);
+       while(1) {
+#ifdef ECP5DDRPHY
+               ddrphy_burstdet_clr_write(1);
+#endif
+               command_prd(DFII_COMMAND_CAS|DFII_COMMAND_CS|DFII_COMMAND_RDDATA);
+               cdelay(15);
+               working = 1;
+               for(p=0;p<DFII_NPHASES;p++) {
+                       /* read back test pattern */
+                       csr_rd_buf_uint8(sdram_dfii_pix_rddata_addr[p],
+                                        tst, DFII_PIX_DATA_BYTES);
+                       /* verify bytes matching current 'module' */
+                       if (prs[p][  NBMODULES-1-module] != tst[  NBMODULES-1-module] ||
+                           prs[p][2*NBMODULES-1-module] != tst[2*NBMODULES-1-module])
+                               working = 0;
                }
-               delay_min = delay;
-
-               /* Get a bit further into the working zone */
+#ifdef ECP5DDRPHY
+               if (((ddrphy_burstdet_seen_read() >> module) & 0x1) != 1)
+                       working = 0;
+#endif
+               if(working)
+                       break;
                delay++;
-               ddrphy_rdly_dq_inc_write(1);
+               if(delay >= ERR_DDRPHY_DELAY)
+                       break;
+               read_delay_inc(module);
+       }
+       delay_min = delay;
 
-               /* Find largest working delay */
-               while(1) {
-                       command_prd(DFII_COMMAND_CAS|DFII_COMMAND_CS|DFII_COMMAND_RDDATA);
-                       cdelay(15);
-                       working = 1;
-                       for(p=0;p<DFII_NPHASES;p++) {
-                               if(MMPTR(sdram_dfii_pix_rddata_addr[p]+4*i) != prs[DFII_PIX_DATA_SIZE*p+i])
-                                       working = 0;
-                               if(MMPTR(sdram_dfii_pix_rddata_addr[p]+4*(i+DFII_PIX_DATA_SIZE/2)) != prs[DFII_PIX_DATA_SIZE*p+i+DFII_PIX_DATA_SIZE/2])
-                                       working = 0;
-                       }
-                       if(!working)
-                               break;
-                       delay++;
-                       if(delay >= ERR_DDRPHY_DELAY)
-                               break;
-                       ddrphy_rdly_dq_inc_write(1);
+       /* Get a bit further into the working zone */
+#ifdef USDDRPHY
+       for(i=0;i<16;i++) {
+               delay += 1;
+               read_delay_inc(module);
+       }
+#else
+       delay++;
+       read_delay_inc(module);
+#endif
+
+       /* Find largest working delay */
+       while(1) {
+#ifdef ECP5DDRPHY
+               ddrphy_burstdet_clr_write(1);
+#endif
+               command_prd(DFII_COMMAND_CAS|DFII_COMMAND_CS|DFII_COMMAND_RDDATA);
+               cdelay(15);
+               working = 1;
+               for(p=0;p<DFII_NPHASES;p++) {
+                       /* read back test pattern */
+                       csr_rd_buf_uint8(sdram_dfii_pix_rddata_addr[p],
+                                        tst, DFII_PIX_DATA_BYTES);
+                       /* verify bytes matching current 'module' */
+                       if (prs[p][  NBMODULES-1-module] != tst[  NBMODULES-1-module] ||
+                           prs[p][2*NBMODULES-1-module] != tst[2*NBMODULES-1-module])
+                               working = 0;
                }
-               delay_max = delay;
+#ifdef ECP5DDRPHY
+               if (((ddrphy_burstdet_seen_read() >> module) & 0x1) != 1)
+                       working = 0;
+#endif
+               if(!working)
+                       break;
+               delay++;
+               if(delay >= ERR_DDRPHY_DELAY)
+                       break;
+               read_delay_inc(module);
+       }
+       delay_max = delay;
 
-               printf("%d:%02d-%02d  ", DFII_PIX_DATA_SIZE/2-i-1, delay_min, delay_max);
+       if (delay_min >= ERR_DDRPHY_DELAY)
+               printf("-");
+       else
+               printf("%02d+-%02d", (delay_min+delay_max)/2, (delay_max-delay_min)/2);
 
-               /* Set delay to the middle */
-               ddrphy_rdly_dq_rst_write(1);
-               for(j=0;j<(delay_min+delay_max)/2;j++)
-                       ddrphy_rdly_dq_inc_write(1);
-       }
+       /* Set delay to the middle */
+       read_delay_rst(module);
+       for(i=0;i<(delay_min+delay_max)/2;i++)
+               read_delay_inc(module);
 
        /* Precharge */
        sdram_dfii_pi0_address_write(0);
        sdram_dfii_pi0_baddress_write(0);
        command_p0(DFII_COMMAND_RAS|DFII_COMMAND_WE|DFII_COMMAND_CS);
        cdelay(15);
-
-       printf("completed\n");
 }
-#endif /* A7DDRPHY_BITSLIP */
 #endif /* CSR_DDRPHY_BASE */
 
+#endif /* CSR_SDRAM_BASE */
+
 static unsigned int seed_to_data_32(unsigned int seed, int random)
 {
        if (random)
@@ -436,10 +718,13 @@ static unsigned short seed_to_data_16(unsigned short seed, int random)
 #define MEMTEST_BUS_SIZE (512)
 #endif
 
+//#define MEMTEST_BUS_DEBUG
+
 static int memtest_bus(void)
 {
        volatile unsigned int *array = (unsigned int *)MAIN_RAM_BASE;
        int i, errors;
+       unsigned int rdata;
 
        errors = 0;
 
@@ -447,20 +732,34 @@ static int memtest_bus(void)
                array[i] = ONEZERO;
        }
        flush_cpu_dcache();
+#ifdef CONFIG_L2_SIZE
        flush_l2_cache();
+#endif
        for(i=0;i<MEMTEST_BUS_SIZE/4;i++) {
-               if(array[i] != ONEZERO)
+               rdata = array[i];
+               if(rdata != ONEZERO) {
                        errors++;
+#ifdef MEMTEST_BUS_DEBUG
+                       printf("[bus: 0x%0x]: 0x%08x vs 0x%08x\n", i, rdata, ONEZERO);
+#endif
+               }
        }
 
        for(i=0;i<MEMTEST_BUS_SIZE/4;i++) {
                array[i] = ZEROONE;
        }
        flush_cpu_dcache();
+#ifdef CONFIG_L2_SIZE
        flush_l2_cache();
+#endif
        for(i=0;i<MEMTEST_BUS_SIZE/4;i++) {
-               if(array[i] != ZEROONE)
+               rdata = array[i];
+               if(rdata != ZEROONE) {
                        errors++;
+#ifdef MEMTEST_BUS_DEBUG
+                       printf("[bus 0x%0x]: 0x%08x vs 0x%08x\n", i, rdata, ZEROONE);
+#endif
+               }
        }
 
        return errors;
@@ -471,11 +770,14 @@ static int memtest_bus(void)
 #endif
 #define MEMTEST_DATA_RANDOM 1
 
+//#define MEMTEST_DATA_DEBUG
+
 static int memtest_data(void)
 {
        volatile unsigned int *array = (unsigned int *)MAIN_RAM_BASE;
        int i, errors;
        unsigned int seed_32;
+       unsigned int rdata;
 
        errors = 0;
        seed_32 = 0;
@@ -487,11 +789,18 @@ static int memtest_data(void)
 
        seed_32 = 0;
        flush_cpu_dcache();
+#ifdef CONFIG_L2_SIZE
        flush_l2_cache();
+#endif
        for(i=0;i<MEMTEST_DATA_SIZE/4;i++) {
                seed_32 = seed_to_data_32(seed_32, MEMTEST_DATA_RANDOM);
-               if(array[i] != seed_32)
+               rdata = array[i];
+               if(rdata != seed_32) {
                        errors++;
+#ifdef MEMTEST_DATA_DEBUG
+                       printf("[data 0x%0x]: 0x%08x vs 0x%08x\n", i, rdata, seed_32);
+#endif
+               }
        }
 
        return errors;
@@ -501,11 +810,14 @@ static int memtest_data(void)
 #endif
 #define MEMTEST_ADDR_RANDOM 0
 
+//#define MEMTEST_ADDR_DEBUG
+
 static int memtest_addr(void)
 {
        volatile unsigned int *array = (unsigned int *)MAIN_RAM_BASE;
        int i, errors;
        unsigned short seed_16;
+       unsigned short rdata;
 
        errors = 0;
        seed_16 = 0;
@@ -517,16 +829,68 @@ static int memtest_addr(void)
 
        seed_16 = 0;
        flush_cpu_dcache();
+#ifdef CONFIG_L2_SIZE
        flush_l2_cache();
+#endif
        for(i=0;i<MEMTEST_ADDR_SIZE/4;i++) {
                seed_16 = seed_to_data_16(seed_16, MEMTEST_ADDR_RANDOM);
-               if(array[(unsigned int) seed_16] != i)
+               rdata = array[(unsigned int) seed_16];
+               if(rdata != i) {
                        errors++;
+#ifdef MEMTEST_ADDR_DEBUG
+                       printf("[addr 0x%0x]: 0x%08x vs 0x%08x\n", i, rdata, i);
+#endif
+               }
        }
 
        return errors;
 }
 
+static void memspeed(void)
+{
+       volatile unsigned int *array = (unsigned int *)MAIN_RAM_BASE;
+       int i;
+       unsigned int start, end;
+       unsigned long write_speed;
+       unsigned long read_speed;
+       __attribute__((unused)) unsigned int data;
+
+       /* init timer */
+       timer0_en_write(0);
+       timer0_reload_write(0);
+       timer0_load_write(0xffffffff);
+       timer0_en_write(1);
+
+       /* write speed */
+       timer0_update_value_write(1);
+       start = timer0_value_read();
+       for(i=0;i<MEMTEST_DATA_SIZE/4;i++) {
+               array[i] = i;
+       }
+       timer0_update_value_write(1);
+       end = timer0_value_read();
+       write_speed = (8*MEMTEST_DATA_SIZE*(CONFIG_CLOCK_FREQUENCY/1000000))/(start - end);
+
+       /* flush CPU and L2 caches */
+       flush_cpu_dcache();
+#ifdef CONFIG_L2_SIZE
+       flush_l2_cache();
+#endif
+
+       /* read speed */
+       timer0_en_write(1);
+       timer0_update_value_write(1);
+       start = timer0_value_read();
+       for(i=0;i<MEMTEST_DATA_SIZE/4;i++) {
+               data = array[i];
+       }
+       timer0_update_value_write(1);
+       end = timer0_value_read();
+       read_speed = (8*MEMTEST_DATA_SIZE*(CONFIG_CLOCK_FREQUENCY/1000000))/(start - end);
+
+       printf("Memspeed Writes: %dMbps Reads: %dMbps\n", write_speed, read_speed);
+}
+
 int memtest(void)
 {
        int bus_errors, data_errors, addr_errors;
@@ -536,7 +900,7 @@ int memtest(void)
                printf("Memtest bus failed: %d/%d errors\n", bus_errors, 2*128);
 
        data_errors = memtest_data();
-       if(bus_errors != 0)
+       if(data_errors != 0)
                printf("Memtest data failed: %d/%d errors\n", data_errors, MEMTEST_DATA_SIZE/4);
 
        addr_errors = memtest_addr();
@@ -547,59 +911,194 @@ int memtest(void)
                return 0;
        else {
                printf("Memtest OK\n");
+               memspeed();
                return 1;
        }
 }
 
+#ifdef CSR_SDRAM_BASE
+
 #ifdef CSR_DDRPHY_BASE
-#ifdef A7DDRPHY_BITSLIP
 int sdrlevel(void)
 {
-       int bitslip, delay, module;
-       int i;
-       sdram_dfii_control_write(DFII_CONTROL_SEL);
-       for(module=0; module<8; module++) {
-               ddrphy_dly_sel_write(1<<module);
-               ddrphy_rdly_dq_rst_write(1);
-               for(bitslip=0; bitslip<A7DDRPHY_BITSLIP; bitslip++) {
-                       // 7-series SERDES in DDR mode needs 3 pulses for 1 bitslip
-                       for(i=0; i<3; i++)
-                               ddrphy_rdly_dq_bitslip_write(1);
-               }
-               for(delay=0; delay<A7DDRPHY_DELAY; delay++)
-                       ddrphy_rdly_dq_inc_write(1);
+       int module;
+       int bitslip;
+       int score;
+       int best_score;
+       int best_bitslip;
+
+       sdrsw();
+
+       for(module=0; module<NBMODULES; module++) {
+#ifdef CSR_DDRPHY_WLEVEL_EN_ADDR
+               write_delay_rst(module);
+#endif
+               read_delay_rst(module);
+               read_bitslip_rst(module);
        }
-       return 1;
-}
-#else
-int sdrlevel(void)
-{
-       int delay[DFII_PIX_DATA_SIZE/2];
-       int high_skew[DFII_PIX_DATA_SIZE/2];
 
-       if(!write_level(delay, high_skew))
+#ifdef CSR_DDRPHY_WLEVEL_EN_ADDR
+       if(!write_level())
                return 0;
-       read_bitslip(delay, high_skew);
-       read_delays();
+#endif
+
+       printf("Read leveling:\n");
+       for(module=0; module<NBMODULES; module++) {
+               /* scan possible read windows */
+               best_score = 0;
+               best_bitslip = 0;
+               for(bitslip=0; bitslip<ERR_DDRPHY_BITSLIP; bitslip++) {
+                       /* compute score */
+                       score = read_level_scan(module, bitslip);
+                       read_level(module);
+                       printf("\n");
+                       if (score > best_score) {
+                               best_bitslip = bitslip;
+                               best_score = score;
+                       }
+                       /* exit */
+                       if (bitslip == ERR_DDRPHY_BITSLIP-1)
+                               break;
+                       /* increment bitslip */
+                       read_bitslip_inc(module);
+               }
+
+               /* select best read window */
+               printf("best: m%d, b%d ", module, best_bitslip);
+               read_bitslip_rst(module);
+               for (bitslip=0; bitslip<best_bitslip; bitslip++)
+                       read_bitslip_inc(module);
+
+               /* re-do leveling on best read window*/
+               read_level(module);
+               printf("\n");
+       }
+
 
        return 1;
 }
 #endif
-#endif
 
 int sdrinit(void)
 {
        printf("Initializing SDRAM...\n");
 
+#ifdef CSR_DDRCTRL_BASE
+       ddrctrl_init_done_write(0);
+       ddrctrl_init_error_write(0);
+#endif
+
        init_sequence();
 #ifdef CSR_DDRPHY_BASE
+#if CSR_DDRPHY_EN_VTC_ADDR
+       ddrphy_en_vtc_write(0);
+#endif
+#ifdef DDRPHY_CMD_DELAY
+       printf("Setting clk/cmd delay to %d taps\n", DDRPHY_CMD_DELAY);
+       ddrphy_cdly(DDRPHY_CMD_DELAY);
+#endif
        sdrlevel();
+#if CSR_DDRPHY_EN_VTC_ADDR
+       ddrphy_en_vtc_write(1);
+#endif
+#endif
+       sdrhw();
+       if(!memtest()) {
+#ifdef CSR_DDRCTRL_BASE
+               ddrctrl_init_done_write(1);
+               ddrctrl_init_error_write(1);
 #endif
-       sdram_dfii_control_write(DFII_CONTROL_SEL);
-       if(!memtest())
                return 0;
+       }
+#ifdef CSR_DDRCTRL_BASE
+       ddrctrl_init_done_write(1);
+#endif
 
        return 1;
 }
 
+#ifdef USDDRPHY_DEBUG
+
+#define MPR0_SEL (0 << 0)
+#define MPR1_SEL (1 << 0)
+#define MPR2_SEL (2 << 0)
+#define MPR3_SEL (3 << 0)
+
+#define MPR_ENABLE (1 << 2)
+
+#define MPR_READ_SERIAL    (0 << 11)
+#define MPR_READ_PARALLEL  (1 << 11)
+#define MPR_READ_STAGGERED (2 << 11)
+
+void sdrcal(void)
+{
+#ifdef CSR_DDRPHY_BASE
+#if CSR_DDRPHY_EN_VTC_ADDR
+       ddrphy_en_vtc_write(0);
+#endif
+       sdrlevel();
+#if CSR_DDRPHY_EN_VTC_ADDR
+       ddrphy_en_vtc_write(1);
+#endif
+#endif
+       sdrhw();
+}
+
+void sdrmrwr(char reg, int value) {
+       sdram_dfii_pi0_address_write(value);
+       sdram_dfii_pi0_baddress_write(reg);
+       command_p0(DFII_COMMAND_RAS|DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS);
+}
+
+static void sdrmpron(char mpr)
+{
+       sdrmrwr(3, MPR_READ_SERIAL | MPR_ENABLE | mpr);
+}
+
+static void sdrmproff(void)
+{
+       sdrmrwr(3, 0);
+}
+
+void sdrmpr(void)
+{
+       int module, phase;
+       unsigned char buf[DFII_PIX_DATA_BYTES];
+       printf("Read SDRAM MPR...\n");
+
+       /* rst phy */
+       for(module=0; module<NBMODULES; module++) {
+#ifdef CSR_DDRPHY_WLEVEL_EN_ADDR
+               write_delay_rst(module);
+#endif
+               read_delay_rst(module);
+               read_bitslip_rst(module);
+       }
+
+       /* software control */
+       sdrsw();
+
+       printf("Reads with MPR0 (0b01010101) enabled...\n");
+       sdrmpron(MPR0_SEL);
+       command_prd(DFII_COMMAND_CAS|DFII_COMMAND_CS|DFII_COMMAND_RDDATA);
+       cdelay(15);
+       for (module=0; module < NBMODULES; module++) {
+               printf("m%d: ", module);
+               for(phase=0; phase<DFII_NPHASES; phase++) {
+                       csr_rd_buf_uint8(sdram_dfii_pix_rddata_addr[phase],
+                                        buf, DFII_PIX_DATA_BYTES);
+                       printf("%d", buf[  NBMODULES-module-1] & 0x1);
+                       printf("%d", buf[2*NBMODULES-module-1] & 0x1);
+               }
+               printf("\n");
+       }
+       sdrmproff();
+
+       /* hardware control */
+       sdrhw();
+}
+
+#endif
+
+
 #endif