Cell *c = module->addCell(module->uniquify(stringf("%s.%d.%d.%d", cell->name.c_str(), grid_d, grid_a, dupidx)), bram.name);
                log("    Creating %s cell at grid position <%d %d %d>: %s\n", log_id(bram.name), grid_d, grid_a, dupidx, log_id(c));
 
-               dict<int, SigBit> clocks;
-
                for (auto &pi : portinfos)
                {
                        if (pi.dupidx != dupidx)
                        string prefix = stringf("%c%d", pi.group + 'A', pi.index + 1);
                        const char *pf = prefix.c_str();
 
-                       if (pi.clocks && (!clocks.count(pi.clocks) || pi.sig_clock.wire))
-                               clocks[pi.clocks] = pi.sig_clock;
+                       if (pi.clocks && (!c->hasPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1)) || pi.sig_clock.wire)) {
+                               c->setPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1), pi.sig_clock);
+                               if (pi.clkpol > 1 && pi.sig_clock.wire)
+                                       c->setParam(stringf("\\CLKPOL%d", (pi.clkpol-1) % clkpol_max + 1), clock_polarities.at(pi.clkpol));
+                       }
 
                        SigSpec addr_ok;
                        if (GetSize(pi.sig_addr) > bram.abits) {
                        sig_addr.extend_u0(bram.abits);
                        c->setPort(stringf("\\%sADDR", pf), sig_addr);
                }
-
-               for (auto &it : clocks)
-                       c->setPort(stringf("\\CLK%d", (it.first-1) % clocks_max + 1), it.second);
-
-               for (auto &it : clock_polarities)
-                       if (it.first > 1)
-                               c->setParam(stringf("\\CLKPOL%d", (it.first-1) % clkpol_max + 1), it.second);
        }
 
        for (auto &it : dout_cache)
 
         # XXX
         init = 0
         transp = [ 0 for i in range(groups) ]
-        clkpol = [ random.randrange(0, 2) for i in range(groups) ]
 
         for p1 in range(groups):
             if wrmode[p1] == 0:
                     states.add(("CPW", clocks[p1], clkpol[p1]))
                 always_hdr = "always @(posedge CLK%d_CLKPOL%d) begin" % (clocks[p1], clkpol[p1])
 
+            v_stmts.append("`ifndef SYNTHESIS")
+            v_stmts.append("event UPDATE_%s;" % pf)
+            v_stmts.append("`endif")
+
             v_stmts.append(always_hdr)
             if wrmode[p1]:
-                v_stmts.append("  `delay(%d)" % portindex);
+                v_stmts.append("  `ifndef SYNTHESIS");
+                v_stmts.append("    #%d;" % portindex);
+                v_stmts.append("    -> UPDATE_%s;" % pf)
+                v_stmts.append("  `endif")
                 for i in range(enable[p1]):
                     enrange = "[%d:%d]" % ((i+1)*dbits/enable[p1]-1, i*dbits/enable[p1])
                     v_stmts.append("  if (%sEN[%d]) memory[%sADDR]%s = %sDATA%s;" % (pf, i, pf, enrange, pf, enrange))
             else:
+                v_stmts.append("  `ifndef SYNTHESIS");
                 if transp[p1]:
-                    v_stmts.append("  `delay(%d)" % (sum(ports)+1))
+                    v_stmts.append("    #%d;" % sum(ports));
+                v_stmts.append("    -> UPDATE_%s;" % pf)
+                v_stmts.append("  `endif")
                 v_stmts.append("  %sDATA %s memory[%sADDR];" % (pf, assign_op, pf))
             v_stmts.append("end")
 
     if debug_mode:
         print("    $dumpfile(`vcd_file);", file=tb_f)
         print("    $dumpvars(0, bram_%02d_%02d_tb);" % (k1, k2), file=tb_f)
+    print("    #%d;" % (1000 + k2), file=tb_f)
 
     for p in (tb_clocks + tb_addr + tb_din):
         if p[-2:] == "EN":
             print("    %s <= ~0;" % p, file=tb_f)
         else:
             print("    %s <= 0;" % p, file=tb_f)
-    print("    #%d;" % (1000 + k2), file=tb_f)
+    print("    #1000;", file=tb_f)
 
     for v in [1, 0, 1, 0]:
         for p in tb_clocks:
     for f in [sim_f, ref_f, tb_f]:
         print("`timescale 1 ns / 1 ns", file=f)
 
-    for f in [sim_f, ref_f]:
-        print("`ifdef SYNTHESIS", file=f)
-        print("  `define delay(n)", file=f)
-        print("`else", file=f)
-        print("  `define delay(n) #n;", file=f)
-        print("`endif", file=f)
-
     for k2 in range(1 if debug_mode else 10):
         create_bram(dsc_f, sim_f, ref_f, tb_f, k1, k2)
 
 
 set -e
 ../../yosys -qq -p "proc; opt; memory -nomap; memory_bram -rules temp/brams_${2}.txt; opt -fast -full" \
                -l temp/synth_${1}_${2}.log -o temp/synth_${1}_${2}.v temp/brams_${1}.v
-iverilog -Dvcd_file=\"temp/tb_${1}_${2}.vcd\" -o temp/tb_${1}_${2}.tb temp/brams_${1}_tb.v temp/brams_${1}_ref.v \
-               temp/synth_${1}_${2}.v temp/brams_${2}.v ../../techlibs/common/simlib.v
+iverilog -Dvcd_file=\"temp/tb_${1}_${2}.vcd\" -DSIMLIB_MEMDELAY=1ns -o temp/tb_${1}_${2}.tb temp/brams_${1}_tb.v \
+               temp/brams_${1}_ref.v temp/synth_${1}_${2}.v temp/brams_${2}.v ../../techlibs/common/simlib.v
 temp/tb_${1}_${2}.tb > temp/tb_${1}_${2}.txt
 if grep -q ERROR temp/tb_${1}_${2}.txt; then
        grep -HC2 ERROR temp/tb_${1}_${2}.txt | head