# a multiplier factor on sleep loops. this allows simulations to run
 # at much shorter intervals
-LONG_TIMER_MULT ?= 10000
-SHORT_TIMER_MULT ?= 100
-#LONG_TIMER_MULT ?= 1
-#SHORT_TIMER_MULT ?= 1
+LONG_TIMER_MULT = 10000
+SHORT_TIMER_MULT = 100
+#LONG_TIMER_MULT = 1
+#SHORT_TIMER_MULT = 1
 
 LIBGRAMDIR = ../libgram
 LIBGRAMINC = ../libgram/include
 
                .mode_registers = {
                        0x320, 0x6, 0x200, 0x0
                },
-               .rdly_p0 = 0,
-               .rdly_p1 = 0,
+               .rdly_p0 = 5,
+               .rdly_p1 = 5,
        };
        struct gramProfile profile2;
        gram_init(&ctx, &profile, (void*)DRAM_BASE, //0x10000000,
     return 0;
 #endif
 
-#if 0
+#if 1
        puts("Rdly\np0: ");
        for (size_t i = 0; i < 8; i++) {
                profile2.rdly_p0 = i;
                writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
        }
 
-       for (size_t i = 0; i < kNumIterations; i++) {
-               if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
-                       puts("fail : *(0x");
-                       uart_writeuint32(&ram[i]);
-                       puts(") = ");
-                       uart_writeuint32(ram[i]);
-                       puts("\n");
-                       failcnt++;
-
-                       if (failcnt > 10) {
-                               puts("Test canceled (more than 10 errors)\n");
-                               break;
-                       }
-               }
-       }
+       for (int dly = 0; dly < 8; dly++) {
+        failcnt = 0;
+        profile2.rdly_p0 = dly;
+        profile2.rdly_p1 = dly;
+        puts("p0 rdly:");
+        uart_writeuint32(profile2.rdly_p0);
+        puts(" p1 rdly:");
+        uart_writeuint32(profile2.rdly_p1);
+               gram_load_calibration(&ctx, &profile2);
+        for (size_t i = 0; i < kNumIterations; i++) {
+            if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
+                puts("fail : *(0x");
+                uart_writeuint32(&ram[i]);
+                puts(") = ");
+                uart_writeuint32(ram[i]);
+                puts("\n");
+                failcnt++;
+
+                if (failcnt > 10) {
+                    puts("Test canceled (more than 10 errors)\n");
+                    break;
+                }
+            }
+        }
+    }
        puts("done\n");
 
        return 0;
 
 #endif
 }
 
-void dfii_rst(const struct gramCtx *ctx) {
-               dfii_setcontrol(ctx, DFII_CONTROL_ODT| DFII_CONTROL_RESET);
+void dfii_reset(const struct gramCtx *ctx) {
+        dfii_set_p0_address(ctx, 0);
+        dfii_set_p0_baddress(ctx, 0);
+               dfii_setcontrol(ctx, DFII_CONTROL_ODT|DFII_CONTROL_RESET);
 }
 
 void dfii_setsw(const struct gramCtx *ctx, bool software_control) {
        dfii_p0_command(ctx, DFII_COMMAND_RAS|DFII_COMMAND_CAS|DFII_COMMAND_WE|DFII_COMMAND_CS);
 }
 
+//comment these in to speed up icarus verilog simulations dramatically
+//#define LONG_TIMER_MULT 1
+//#define SHORT_TIMER_MULT 1
+
 #define MR0_DLL_RESET (1 << 8)
 void dfii_initseq(const struct gramCtx *ctx, const struct gramProfile *profile) {
        /* Release reset */
        dfii_set_p0_address(ctx, 0x0);
        dfii_set_p0_baddress(ctx, 0);
        dfii_setcontrol(ctx, DFII_CONTROL_ODT|DFII_CONTROL_RESET);
-       cdelay(5*LONG_TIMER_MULT);
+       //cdelay(5*LONG_TIMER_MULT);
 
        /* Bring CKE high */
-       dfii_set_p0_address(ctx, 0x0);
-       dfii_set_p0_baddress(ctx, 0);
+       //dfii_set_p0_address(ctx, 0x0);
+       //dfii_set_p0_baddress(ctx, 0);
        dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET);
        cdelay(1*LONG_TIMER_MULT);
 
 
     //puts("bdet");
     //uart_writeuint32((unsigned long)&(ctx->phy->burstdet));
 
-       //dfii_reset(ctx);
-    //puts("reset\n");
-       dfii_setsw(ctx, true);
-    //puts("dfii_setsw\n");
        dfii_initseq(ctx, profile);
     puts("initseq\n");
        gram_load_calibration(ctx, profile);
 
         if ddr_pins is not None or fpga == 'sim':
             ddrmodule = dram_cls(clk_freq, "1:2") # match DDR3 ASIC P/N
 
-            drs = lambda x: x
-            #drs = DomainRenamer("dramsync")
+            #drs = lambda x: x
+            drs = DomainRenamer("dramsync")
 
             if fpga == 'sim':
                 self.ddrphy = FakePHY(module=ddrmodule,
     clk_freq = 70e6
     if fpga == 'sim':
         clk_freq = 100e6
+    if fpga == 'versa_ecp5':
+        clk_freq = 55e6
 
     # select a firmware file
     firmware = None