kernel/mem: Introduce transparency masks.
authorMarcelina Kościelnicka <mwk@0x04.net>
Sat, 31 Jul 2021 21:21:37 +0000 (23:21 +0200)
committerMarcelina Kościelnicka <mwk@0x04.net>
Tue, 10 Aug 2021 22:04:16 +0000 (00:04 +0200)
backends/cxxrtl/cxxrtl_backend.cc
backends/verilog/verilog_backend.cc
kernel/mem.cc
kernel/mem.h
passes/memory/memory_bram.cc
passes/memory/memory_dff.cc
passes/memory/memory_share.cc
passes/opt/opt_mem_feedback.cc

index 95ad6a86ea93427770d3fe06f92459b768be1606..40e61e5af6e1ec8c7f14ffc14010d356a7987611 100644 (file)
@@ -542,19 +542,21 @@ struct FlowGraph {
                        add_uses(node, port.arst);
                        add_uses(node, port.srst);
                        add_uses(node, port.addr);
-                       if (port.transparent && port.clk_enable) {
-                               // Our implementation of transparent read ports reads en, addr and data from every write port
-                               // in the same domain.
-                               for (auto &wrport : mem->wr_ports) {
-                                       if (wrport.clk_enable && wrport.clk == port.clk && wrport.clk_polarity == port.clk_polarity) {
-                                               add_uses(node, wrport.en);
-                                               add_uses(node, wrport.addr);
-                                               add_uses(node, wrport.data);
-                                       }
+                       bool transparent = false;
+                       for (int j = 0; j < GetSize(mem->wr_ports); j++) {
+                               auto &wrport = mem->wr_ports[j];
+                               if (port.transparency_mask[j]) {
+                                       // Our implementation of transparent read ports reads en, addr and data from every write port
+                                       // the read port is transparent with.
+                                       add_uses(node, wrport.en);
+                                       add_uses(node, wrport.addr);
+                                       add_uses(node, wrport.data);
+                                       transparent = true;
                                }
-                               // Also we read the address twice in this case (prevent inlining).
-                               add_uses(node, port.addr);
                        }
+                       // Also we read the read address twice in this case (prevent inlining).
+                       if (transparent)
+                               add_uses(node, port.addr);
                }
                if (!mem->wr_ports.empty()) {
                        Node *node = new Node;
@@ -1604,17 +1606,18 @@ struct CxxrtlWorker {
                                std::string lhs_temp = fresh_temporary();
                                f << indent << "value<" << mem->width << "> " << lhs_temp << " = "
                                            << mangle(mem) << "[" << valid_index_temp << ".index];\n";
-                               if (port.transparent && port.clk_enable) {
+                               bool transparent = false;
+                               for (auto bit : port.transparency_mask)
+                                       if (bit)
+                                               transparent = true;
+                               if (transparent) {
                                        std::string addr_temp = fresh_temporary();
                                        f << indent << "const value<" << port.addr.size() << "> &" << addr_temp << " = ";
                                        dump_sigspec_rhs(port.addr);
                                        f << ";\n";
-                                       for (auto &wrport : mem->wr_ports) {
-                                               if (!wrport.clk_enable)
-                                                       continue;
-                                               if (wrport.clk != port.clk)
-                                                       continue;
-                                               if (wrport.clk_polarity != port.clk_polarity)
+                                       for (int i = 0; i < GetSize(mem->wr_ports); i++) {
+                                               auto &wrport = mem->wr_ports[i];
+                                               if (!port.transparency_mask[i])
                                                        continue;
                                                f << indent << "if (" << addr_temp << " == ";
                                                dump_sigspec_rhs(wrport.addr);
index 8f96c3a589898d09d5e4c1133cbe0490191f481b..47b48a4603980870dac605ea240a816f24d6aea1 100644 (file)
@@ -555,7 +555,10 @@ void dump_memory(std::ostream &f, std::string indent, Mem &mem)
                        }
 
                        // Decide how to represent the transparency; same idea as Mem::extract_rdff.
-                       bool trans_use_addr = port.transparent;
+                       bool trans_use_addr = true;
+                       for (auto bit : port.transparency_mask)
+                               if (!bit)
+                                       trans_use_addr = false;
 
                        if (GetSize(mem.wr_ports) == 0)
                                trans_use_addr = false;
@@ -630,13 +633,7 @@ void dump_memory(std::ostream &f, std::string indent, Mem &mem)
 
                                for (int i = 0; i < GetSize(mem.wr_ports); i++) {
                                        auto &wport = mem.wr_ports[i];
-                                       if (!port.transparent)
-                                               continue;
-                                       if (!wport.clk_enable)
-                                               continue;
-                                       if (wport.clk != port.clk)
-                                               continue;
-                                       if (wport.clk_polarity != port.clk_polarity)
+                                       if (!port.transparency_mask[i] && !port.collision_x_mask[i])
                                                continue;
                                        int min_wide_log2 = std::min(port.wide_log2, wport.wide_log2);
                                        int max_wide_log2 = std::max(port.wide_log2, wport.wide_log2);
@@ -679,7 +676,10 @@ void dump_memory(std::ostream &f, std::string indent, Mem &mem)
                                                        if (epos-pos != GetSize(port.data))
                                                                os2 << stringf("[%d:%d]", rsub * mem.width + epos-1, rsub * mem.width + pos);
                                                        os2 << " <= ";
-                                                       dump_sigspec(os2, wport.data.extract(wsub * mem.width + pos, epos-pos));
+                                                       if (port.transparency_mask[i])
+                                                               dump_sigspec(os2, wport.data.extract(wsub * mem.width + pos, epos-pos));
+                                                       else
+                                                               dump_sigspec(os2, Const(State::Sx, epos - pos));
                                                        os2 << ";\n";
                                                        clk_to_lof_body[clk_domain_str].push_back(os2.str());
 
index a3b244eab8b0d5d804abb38cbb2bfc12fbd9419b..402ab55200f1a1d157e5d2d474057fa0b57b670d 100644 (file)
@@ -100,8 +100,14 @@ void Mem::emit() {
                        std::swap(inits[i], inits[init_left[i]]);
        inits.resize(GetSize(init_left));
 
-       // for future: handle transparency mask here
-
+       for (auto &port : rd_ports) {
+               for (int i = 0; i < GetSize(wr_left); i++) {
+                       port.transparency_mask[i] = port.transparency_mask[wr_left[i]];
+                       port.collision_x_mask[i] = port.collision_x_mask[wr_left[i]];
+               }
+               port.transparency_mask.resize(GetSize(wr_left));
+               port.collision_x_mask.resize(GetSize(wr_left));
+       }
        for (auto &port : wr_ports) {
                for (int i = 0; i < GetSize(wr_left); i++)
                        port.priority_mask[i] = port.priority_mask[wr_left[i]];
@@ -139,6 +145,20 @@ void Mem::emit() {
                        log_assert(port.arst == State::S0);
                        log_assert(port.srst == State::S0);
                        log_assert(port.init_value == Const(State::Sx, width << port.wide_log2));
+                       bool transparent = false;
+                       bool non_transparent = false;
+                       if (port.clk_enable) {
+                               for (int i = 0; i < GetSize(wr_ports); i++) {
+                                       auto &oport = wr_ports[i];
+                                       if (oport.clk_enable && oport.clk == port.clk && oport.clk_polarity == port.clk_polarity) {
+                                               if (port.transparency_mask[i])
+                                                       transparent = true;
+                                               else if (!port.collision_x_mask[i])
+                                                       non_transparent = true;
+                                       }
+                               }
+                               log_assert(!transparent || !non_transparent);
+                       }
                        if (port.cell) {
                                module->remove(port.cell);
                                port.cell = nullptr;
@@ -148,7 +168,7 @@ void Mem::emit() {
                                rd_wide_continuation.bits.push_back(State(sub != 0));
                                rd_clk_enable.bits.push_back(State(port.clk_enable));
                                rd_clk_polarity.bits.push_back(State(port.clk_polarity));
-                               rd_transparent.bits.push_back(State(port.transparent));
+                               rd_transparent.bits.push_back(State(transparent));
                                rd_clk.append(port.clk);
                                rd_en.append(port.en);
                                SigSpec addr = port.sub_addr(sub);
@@ -231,6 +251,20 @@ void Mem::emit() {
                        log_assert(port.arst == State::S0);
                        log_assert(port.srst == State::S0);
                        log_assert(port.init_value == Const(State::Sx, width << port.wide_log2));
+                       bool transparent = false;
+                       bool non_transparent = false;
+                       if (port.clk_enable) {
+                               for (int i = 0; i < GetSize(wr_ports); i++) {
+                                       auto &oport = wr_ports[i];
+                                       if (oport.clk_enable && oport.clk == port.clk && oport.clk_polarity == port.clk_polarity) {
+                                               if (port.transparency_mask[i])
+                                                       transparent = true;
+                                               else if (!port.collision_x_mask[i])
+                                                       non_transparent = true;
+                                       }
+                               }
+                               log_assert(!transparent || !non_transparent);
+                       }
                        if (!port.cell)
                                port.cell = module->addCell(NEW_ID, ID($memrd));
                        port.cell->attributes = port.attributes;
@@ -239,7 +273,7 @@ void Mem::emit() {
                        port.cell->parameters[ID::WIDTH] = width << port.wide_log2;
                        port.cell->parameters[ID::CLK_ENABLE] = port.clk_enable;
                        port.cell->parameters[ID::CLK_POLARITY] = port.clk_polarity;
-                       port.cell->parameters[ID::TRANSPARENT] = port.transparent;
+                       port.cell->parameters[ID::TRANSPARENT] = transparent;
                        port.cell->setPort(ID::CLK, port.clk);
                        port.cell->setPort(ID::EN, port.en);
                        port.cell->setPort(ID::ADDR, port.addr);
@@ -405,7 +439,6 @@ void Mem::check() {
                log_assert(GetSize(port.arst_value) == (width << port.wide_log2));
                log_assert(GetSize(port.srst_value) == (width << port.wide_log2));
                if (!port.clk_enable) {
-                       log_assert(!port.transparent);
                        log_assert(port.en == State::S1);
                        log_assert(port.arst == State::S0);
                        log_assert(port.srst == State::S0);
@@ -414,6 +447,18 @@ void Mem::check() {
                        log_assert(port.addr[j] == State::S0);
                }
                max_wide_log2 = std::max(max_wide_log2, port.wide_log2);
+               log_assert(GetSize(port.transparency_mask) == GetSize(wr_ports));
+               log_assert(GetSize(port.collision_x_mask) == GetSize(wr_ports));
+               for (int j = 0; j < GetSize(wr_ports); j++) {
+                       auto &wport = wr_ports[j];
+                       if ((port.transparency_mask[j] || port.collision_x_mask[j]) && !wport.removed) {
+                               log_assert(port.clk_enable);
+                               log_assert(wport.clk_enable);
+                               log_assert(port.clk == wport.clk);
+                               log_assert(port.clk_polarity == wport.clk_polarity);
+                       }
+                       log_assert(!port.transparency_mask[j] || !port.collision_x_mask[j]);
+               }
        }
        for (int i = 0; i < GetSize(wr_ports); i++) {
                auto &port = wr_ports[i];
@@ -467,6 +512,7 @@ namespace {
                res.packed = false;
                res.mem = mem;
                res.attributes = mem->attributes;
+               std::vector<bool> rd_transparent;
                if (index.rd_ports.count(mem->name)) {
                        for (auto cell : index.rd_ports.at(mem->name)) {
                                MemRd mrd;
@@ -474,7 +520,7 @@ namespace {
                                mrd.attributes = cell->attributes;
                                mrd.clk_enable = cell->parameters.at(ID::CLK_ENABLE).as_bool();
                                mrd.clk_polarity = cell->parameters.at(ID::CLK_POLARITY).as_bool();
-                               mrd.transparent = cell->parameters.at(ID::TRANSPARENT).as_bool();
+                               bool transparent = cell->parameters.at(ID::TRANSPARENT).as_bool();
                                mrd.clk = cell->getPort(ID::CLK);
                                mrd.en = cell->getPort(ID::EN);
                                mrd.addr = cell->getPort(ID::ADDR);
@@ -491,11 +537,12 @@ namespace {
                                        // but don't want to see moving forwards: async transparent
                                        // ports (inherently meaningless) and async ports without
                                        // const 1 tied to EN bit (which may mean a latch in the future).
-                                       mrd.transparent = false;
+                                       transparent = false;
                                        if (mrd.en == State::Sx)
                                                mrd.en = State::S1;
                                }
                                res.rd_ports.push_back(mrd);
+                               rd_transparent.push_back(transparent);
                        }
                }
                if (index.wr_ports.count(mem->name)) {
@@ -559,6 +606,25 @@ namespace {
                                port.priority_mask[j] = true;
                        }
                }
+               for (int i = 0; i < GetSize(res.rd_ports); i++) {
+                       auto &port = res.rd_ports[i];
+                       port.transparency_mask.resize(GetSize(res.wr_ports));
+                       port.collision_x_mask.resize(GetSize(res.wr_ports));
+                       if (!rd_transparent[i])
+                               continue;
+                       if (!port.clk_enable)
+                               continue;
+                       for (int j = 0; j < GetSize(res.wr_ports); j++) {
+                               auto &wport = res.wr_ports[j];
+                               if (!wport.clk_enable)
+                                       continue;
+                               if (port.clk != wport.clk)
+                                       continue;
+                               if (port.clk_polarity != wport.clk_polarity)
+                                       continue;
+                               port.transparency_mask[j] = true;
+                       }
+               }
                res.check();
                return res;
        }
@@ -601,7 +667,6 @@ namespace {
                        mrd.wide_log2 = 0;
                        mrd.clk_enable = cell->parameters.at(ID::RD_CLK_ENABLE).extract(i, 1).as_bool();
                        mrd.clk_polarity = cell->parameters.at(ID::RD_CLK_POLARITY).extract(i, 1).as_bool();
-                       mrd.transparent = cell->parameters.at(ID::RD_TRANSPARENT).extract(i, 1).as_bool();
                        mrd.clk = cell->getPort(ID::RD_CLK).extract(i, 1);
                        mrd.en = cell->getPort(ID::RD_EN).extract(i, 1);
                        mrd.addr = cell->getPort(ID::RD_ADDR).extract(i * abits, abits);
@@ -639,6 +704,25 @@ namespace {
                                port.priority_mask[j] = true;
                        }
                }
+               for (int i = 0; i < GetSize(res.rd_ports); i++) {
+                       auto &port = res.rd_ports[i];
+                       port.transparency_mask.resize(GetSize(res.wr_ports));
+                       port.collision_x_mask.resize(GetSize(res.wr_ports));
+                       if (!cell->parameters.at(ID::RD_TRANSPARENT).extract(i, 1).as_bool())
+                               continue;
+                       if (!port.clk_enable)
+                               continue;
+                       for (int j = 0; j < GetSize(res.wr_ports); j++) {
+                               auto &wport = res.wr_ports[j];
+                               if (!wport.clk_enable)
+                                       continue;
+                               if (port.clk != wport.clk)
+                                       continue;
+                               if (port.clk_polarity != wport.clk_polarity)
+                                       continue;
+                               port.transparency_mask[j] = true;
+                       }
+               }
                res.check();
                return res;
        }
@@ -690,7 +774,10 @@ Cell *Mem::extract_rdff(int idx, FfInitVals *initvals) {
        //
        // - otherwise, put the FF on the data output, and make bypass paths for
        //   all write ports wrt which this port is transparent
-       bool trans_use_addr = port.transparent;
+       bool trans_use_addr = true;
+       for (int i = 0; i < GetSize(wr_ports); i++)
+               if (!port.transparency_mask[i] && !wr_ports[i].removed)
+                       trans_use_addr = false;
 
        // If there are no write ports at all, we could possibly use either way; do data
        // FF in this case.
@@ -735,7 +822,9 @@ Cell *Mem::extract_rdff(int idx, FfInitVals *initvals) {
 
                for (int i = 0; i < GetSize(wr_ports); i++) {
                        auto &wport = wr_ports[i];
-                       if (port.transparent) {
+                       if (wport.removed)
+                               continue;
+                       if (port.transparency_mask[i] || port.collision_x_mask[i]) {
                                log_assert(wport.clk_enable);
                                log_assert(wport.clk == port.clk);
                                log_assert(wport.clk_enable == port.clk_enable);
@@ -761,7 +850,7 @@ Cell *Mem::extract_rdff(int idx, FfInitVals *initvals) {
                                                while (epos < ewidth && wport.en[epos + wsub * width] == wport.en[pos + wsub * width])
                                                        epos++;
                                                SigSpec cur = sig_d.extract(pos + rsub * width, epos-pos);
-                                               SigSpec other = wport.data.extract(pos + wsub * width, epos-pos);
+                                               SigSpec other = port.transparency_mask[i] ? wport.data.extract(pos + wsub * width, epos-pos) : Const(State::Sx, epos-pos);
                                                SigSpec cond;
                                                if (raddr != waddr)
                                                        cond = module->And(stringf("$%s$rdtransgate[%d][%d][%d][%d]$d", memid.c_str(), idx, i, sub, pos), wport.en[pos + wsub * width], addr_eq);
@@ -815,12 +904,16 @@ Cell *Mem::extract_rdff(int idx, FfInitVals *initvals) {
        port.srst = State::S0;
        port.clk_enable = false;
        port.clk_polarity = true;
-       port.transparent = false;
        port.ce_over_srst = false;
        port.arst_value = Const(State::Sx, GetSize(port.data));
        port.srst_value = Const(State::Sx, GetSize(port.data));
        port.init_value = Const(State::Sx, GetSize(port.data));
 
+       for (int i = 0; i < GetSize(wr_ports); i++) {
+               port.transparency_mask[i] = false;
+               port.collision_x_mask[i] = false;
+       }
+
        return c;
 }
 
@@ -857,6 +950,12 @@ void Mem::narrow() {
                        port.addr = port.sub_addr(it.second);
                        port.wide_log2 = 0;
                }
+               port.transparency_mask.clear();
+               port.collision_x_mask.clear();
+               for (auto &it2 : new_wr_map)
+                       port.transparency_mask.push_back(orig.transparency_mask[it2.first]);
+               for (auto &it2 : new_wr_map)
+                       port.collision_x_mask.push_back(orig.collision_x_mask[it2.first]);
                new_rd_ports.push_back(port);
        }
        for (auto &it : new_wr_map) {
@@ -879,12 +978,19 @@ void Mem::narrow() {
        std::swap(wr_ports, new_wr_ports);
 }
 
-void Mem::emulate_priority(int idx1, int idx2)
+void Mem::emulate_priority(int idx1, int idx2, FfInitVals *initvals)
 {
        auto &port1 = wr_ports[idx1];
        auto &port2 = wr_ports[idx2];
        if (!port2.priority_mask[idx1])
                return;
+       for (int i = 0; i < GetSize(rd_ports); i++) {
+               auto &rport = rd_ports[i];
+               if (rport.removed)
+                       continue;
+               if (rport.transparency_mask[idx1] && !(rport.transparency_mask[idx2] || rport.collision_x_mask[idx2]))
+                       emulate_transparency(idx1, i, initvals);
+       }
        int min_wide_log2 = std::min(port1.wide_log2, port2.wide_log2);
        int max_wide_log2 = std::max(port1.wide_log2, port2.wide_log2);
        bool wide1 = port1.wide_log2 > port2.wide_log2;
@@ -916,7 +1022,99 @@ void Mem::emulate_priority(int idx1, int idx2)
        port2.priority_mask[idx1] = false;
 }
 
-void Mem::prepare_wr_merge(int idx1, int idx2) {
+void Mem::emulate_transparency(int widx, int ridx, FfInitVals *initvals) {
+       auto &wport = wr_ports[widx];
+       auto &rport = rd_ports[ridx];
+       log_assert(rport.transparency_mask[widx]);
+       // If other write ports have priority over this one, emulate their transparency too.
+       for (int i = GetSize(wr_ports) - 1; i > widx; i--) {
+               if (wr_ports[i].removed)
+                       continue;
+               if (rport.transparency_mask[i] && wr_ports[i].priority_mask[widx])
+                       emulate_transparency(i, ridx, initvals);
+       }
+       int min_wide_log2 = std::min(rport.wide_log2, wport.wide_log2);
+       int max_wide_log2 = std::max(rport.wide_log2, wport.wide_log2);
+       bool wide_write = wport.wide_log2 > rport.wide_log2;
+       // The write data FF doesn't need full reset/init behavior, as it'll be masked by
+       // the mux whenever this would be relevant.  It does, however, need to have the same
+       // clock enable signal as the read port.
+       SigSpec wdata_q = module->addWire(NEW_ID, GetSize(wport.data));
+       module->addDffe(NEW_ID, rport.clk, rport.en, wport.data, wdata_q, rport.clk_polarity, true);
+       for (int sub = 0; sub < (1 << max_wide_log2); sub += (1 << min_wide_log2)) {
+               SigSpec raddr = rport.addr;
+               SigSpec waddr = wport.addr;
+               for (int j = min_wide_log2; j < max_wide_log2; j++)
+                       if (wide_write)
+                               waddr = wport.sub_addr(sub);
+                       else
+                               raddr = rport.sub_addr(sub);
+               SigSpec addr_eq;
+               if (raddr != waddr)
+                       addr_eq = module->Eq(NEW_ID, raddr, waddr);
+               int pos = 0;
+               int ewidth = width << min_wide_log2;
+               int wsub = wide_write ? sub : 0;
+               int rsub = wide_write ? 0 : sub;
+               SigSpec rdata_a = module->addWire(NEW_ID, ewidth);
+               while (pos < ewidth) {
+                       int epos = pos;
+                       while (epos < ewidth && wport.en[epos + wsub * width] == wport.en[pos + wsub * width])
+                               epos++;
+                       SigSpec cond;
+                       if (raddr != waddr)
+                               cond = module->And(NEW_ID, wport.en[pos + wsub * width], addr_eq);
+                       else
+                               cond = wport.en[pos + wsub * width];
+                       SigSpec cond_q = module->addWire(NEW_ID);
+                       // The FF for storing the bypass enable signal must be carefully
+                       // constructed to preserve the overall init/reset/enable behavior
+                       // of the whole port.
+                       FfData ff(initvals);
+                       ff.width = 1;
+                       ff.sig_q = cond_q;
+                       ff.has_d = true;
+                       ff.sig_d = cond;
+                       ff.has_clk = true;
+                       ff.sig_clk = rport.clk;
+                       ff.pol_clk = rport.clk_polarity;
+                       if (rport.en != State::S1) {
+                               ff.has_en = true;
+                               ff.sig_en = rport.en;
+                               ff.pol_en = true;
+                       }
+                       if (rport.arst != State::S0) {
+                               ff.has_arst = true;
+                               ff.sig_arst = rport.arst;
+                               ff.pol_arst = true;
+                               ff.val_arst = State::S0;
+                       }
+                       if (rport.srst != State::S0) {
+                               ff.has_srst = true;
+                               ff.sig_srst = rport.srst;
+                               ff.pol_srst = true;
+                               ff.val_srst = State::S0;
+                               ff.ce_over_srst = rport.ce_over_srst;
+                       }
+                       if (!rport.init_value.is_fully_undef())
+                               ff.val_init = State::S0;
+                       else
+                               ff.val_init = State::Sx;
+                       ff.emit(module, NEW_ID);
+                       // And the final bypass mux.
+                       SigSpec cur = rdata_a.extract(pos, epos-pos);
+                       SigSpec other = wdata_q.extract(pos + wsub * width, epos-pos);
+                       SigSpec dest = rport.data.extract(pos + rsub * width, epos-pos);
+                       module->addMux(NEW_ID, cur, other, cond_q, dest);
+                       pos = epos;
+               }
+               rport.data.replace(rsub * width, rdata_a);
+       }
+       rport.transparency_mask[widx] = false;
+       rport.collision_x_mask[widx] = true;
+}
+
+void Mem::prepare_wr_merge(int idx1, int idx2, FfInitVals *initvals) {
        log_assert(idx1 < idx2);
        auto &port1 = wr_ports[idx1];
        auto &port2 = wr_ports[idx2];
@@ -926,14 +1124,97 @@ void Mem::prepare_wr_merge(int idx1, int idx2) {
                        port1.priority_mask[i] = true;
        // If port 2 has priority over a port after port 1, emulate it.
        for (int i = idx1 + 1; i < idx2; i++)
-               if (port2.priority_mask[i])
-                       emulate_priority(i, idx2);
+               if (port2.priority_mask[i] && !wr_ports[i].removed)
+                       emulate_priority(i, idx2, initvals);
        // If some port had priority over port 2, make it have priority over the merged port too.
        for (int i = idx2 + 1; i < GetSize(wr_ports); i++) {
                auto &oport = wr_ports[i];
                if (oport.priority_mask[idx2])
                        oport.priority_mask[idx1] = true;
        }
+       // Make sure all read ports have identical collision/transparency behavior wrt both
+       // ports.
+       for (int i = 0; i < GetSize(rd_ports); i++) {
+               auto &rport = rd_ports[i];
+               if (rport.removed)
+                       continue;
+               // If collision already undefined with both ports, it's fine.
+               if (rport.collision_x_mask[idx1] && rport.collision_x_mask[idx2])
+                       continue;
+               // If one port has undefined collision, change it to the behavior
+               // of the other port.
+               if (rport.collision_x_mask[idx1]) {
+                       rport.collision_x_mask[idx1] = false;
+                       rport.transparency_mask[idx1] = rport.transparency_mask[idx2];
+                       continue;
+               }
+               if (rport.collision_x_mask[idx2]) {
+                       rport.collision_x_mask[idx2] = false;
+                       rport.transparency_mask[idx2] = rport.transparency_mask[idx1];
+                       continue;
+               }
+               // If transparent with both ports, also fine.
+               if (rport.transparency_mask[idx1] && rport.transparency_mask[idx2])
+                       continue;
+               // If transparent with only one, emulate it, and remove the collision-X
+               // flag that emulate_transparency will set (to align with the other port).
+               if (rport.transparency_mask[idx1]) {
+                       emulate_transparency(i, idx1, initvals);
+                       rport.collision_x_mask[idx1] = false;
+                       continue;
+               }
+               if (rport.transparency_mask[idx2]) {
+                       emulate_transparency(i, idx2, initvals);
+                       rport.collision_x_mask[idx2] = false;
+                       continue;
+               }
+               // If we got here, it's transparent with neither port, which is fine.
+       }
+}
+
+void Mem::prepare_rd_merge(int idx1, int idx2, FfInitVals *initvals) {
+       auto &port1 = rd_ports[idx1];
+       auto &port2 = rd_ports[idx2];
+       // Note that going through write ports in order is important, since
+       // emulating transparency of a write port can change transparency
+       // mask for higher-numbered ports (due to transitive transparency
+       // emulation needed because of write port priority).
+       for (int i = 0; i < GetSize(wr_ports); i++) {
+               if (wr_ports[i].removed)
+                       continue;
+               // Both ports undefined, OK.
+               if (port1.collision_x_mask[i] && port2.collision_x_mask[i])
+                       continue;
+               // Only one port undefined — change its behavior
+               // to align with the other port.
+               if (port1.collision_x_mask[i]) {
+                       port1.collision_x_mask[i] = false;
+                       port1.transparency_mask[i] = port2.transparency_mask[i];
+                       continue;
+               }
+               if (port2.collision_x_mask[i]) {
+                       port2.collision_x_mask[i] = false;
+                       port2.transparency_mask[i] = port1.transparency_mask[i];
+                       continue;
+               }
+               // Both ports transparent, OK.
+               if (port1.transparency_mask[i] && port2.transparency_mask[i])
+                       continue;
+               // Only one port transparent — emulate transparency
+               // on the other.
+               if (port1.transparency_mask[i]) {
+                       emulate_transparency(i, idx1, initvals);
+                       port1.collision_x_mask[i] = false;
+                       continue;
+               }
+               if (port2.transparency_mask[i]) {
+                       emulate_transparency(i, idx2, initvals);
+                       port2.collision_x_mask[i] = false;
+                       continue;
+               }
+               // No ports transparent, OK.
+       }
+
 }
 
 void Mem::widen_prep(int wide_log2) {
index 24c2d64c8dc69f7ae446932f3f2d47efd2ba15a5..87a148bebdda5d20c0eac2e275b8bdf580486375 100644 (file)
@@ -31,7 +31,19 @@ struct MemRd : RTLIL::AttrObject {
        int wide_log2;
        bool clk_enable, clk_polarity, ce_over_srst;
        Const arst_value, srst_value, init_value;
-       bool transparent;
+       // One bit for every write port, true iff simultanous read on this
+       // port and write on the other port will bypass the written data
+       // to this port's output (default behavior is to read old value).
+       // Can only be set for write ports that have the same clock domain.
+       std::vector<bool> transparency_mask;
+       // One bit for every write port, true iff simultanous read on this
+       // port and write on the other port will return an all-X (don't care)
+       // value.  Mutually exclusive with transparency_mask.
+       // Can only be set for write ports that have the same clock domain.
+       // For optimization purposes, this will also be set if we can
+       // determine that the two ports can never be active simultanously
+       // (making the above vacuously true).
+       std::vector<bool> collision_x_mask;
        SigSpec clk, en, arst, srst, addr, data;
 
        MemRd() : removed(false), cell(nullptr) {}
@@ -139,15 +151,34 @@ struct Mem : RTLIL::AttrObject {
        // If write port idx2 currently has priority over write port idx1,
        // inserts extra logic on idx1's enable signal to disable writes
        // when idx2 is writing to the same address, then removes the priority
-       // from the priority mask.
-       void emulate_priority(int idx1, int idx2);
+       // from the priority mask.  If there is a memory port that is
+       // transparent with idx1, but not with idx2, that port is converted
+       // to use soft transparency logic.
+       void emulate_priority(int idx1, int idx2, FfInitVals *initvals);
+
+       // Creates soft-transparency logic on read port ridx, bypassing the
+       // data from write port widx.  Should only be called when ridx is
+       // transparent wrt widx in the first place.  Once we're done, the
+       // transparency_mask bit will be cleared, and the collision_x_mask
+       // bit will be set instead (since whatever value is read will be
+       // replaced by the soft transparency logic).
+       void emulate_transparency(int widx, int ridx, FfInitVals *initvals);
 
        // Prepares for merging write port idx2 into idx1 (where idx1 < idx2).
        // Specifically, takes care of priority masks: any priority relations
        // that idx2 had are replicated onto idx1, unless they conflict with
        // priorities already present on idx1, in which case emulate_priority
-       // is called.
-       void prepare_wr_merge(int idx1, int idx2);
+       // is called.  Likewise, ensures transparency and undefined collision
+       // masks of all read ports have the same values for both ports,
+       // calling emulate_transparency if necessary.
+       void prepare_wr_merge(int idx1, int idx2, FfInitVals *initvals);
+
+       // Prepares for merging read port idx2 into idx1.
+       // Specifically, makes sure the transparency and undefined collision
+       // masks of both ports are equal, by changing undefined behavior
+       // of one port to the other's defined behavior, or by calling
+       // emulate_transparency if necessary.
+       void prepare_rd_merge(int idx1, int idx2, FfInitVals *initvals);
 
        // Prepares the memory for widening a port to a given width.  This
        // involves ensuring that start_offset and size are aligned to the
index af8137adac103c9751d39a55fee17b560b130948..fed9d60c0ffa4b7c8ee9f10619fe748d6bdd9547 100644 (file)
@@ -405,10 +405,6 @@ bool replace_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals, const
        auto portinfos = bram.make_portinfos();
        int dup_count = 1;
 
-       pair<SigBit, bool> make_transp_clk;
-       bool enable_make_transp = false;
-       int make_transp_enbits = 0;
-
        dict<int, pair<SigBit, bool>> clock_domains;
        dict<int, bool> clock_polarities;
        dict<int, bool> read_transp;
@@ -496,8 +492,6 @@ bool replace_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals, const
                for (; bram_port_i < GetSize(portinfos); bram_port_i++)
                {
                        auto &pi = portinfos[bram_port_i];
-                       make_transp_enbits = pi.enable ? pi.enable : 1;
-                       make_transp_clk = clkdom;
 
                        if (pi.wrmode != 1)
                skip_bram_wport:
@@ -606,10 +600,16 @@ grow_read_ports:;
        for (int cell_port_i = 0; cell_port_i < GetSize(mem.rd_ports); cell_port_i++)
        {
                auto &port = mem.rd_ports[cell_port_i];
-               bool transp = port.transparent;
-
-               if (mem.wr_ports.empty())
-                       transp = false;
+               bool transp = false;
+               bool non_transp = false;
+
+               if (port.clk_enable) {
+                       for (int i = 0; i < GetSize(mem.wr_ports); i++)
+                               if (port.transparency_mask[i])
+                                       transp = true;
+                               else if (!port.collision_x_mask[i])
+                                       non_transp = true;
+               }
 
                pair<SigBit, bool> clkdom(port.clk, port.clk_polarity);
                if (!port.clk_enable)
@@ -660,16 +660,13 @@ grow_read_ports:;
                                        log("        Bram port %c%d.%d has no initial value support.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1);
                                        goto skip_bram_rport;
                                }
-                               if (read_transp.count(pi.transp) && read_transp.at(pi.transp) != transp) {
-                                       if (match.make_transp && GetSize(mem.wr_ports) <= 1) {
+                               if (non_transp && read_transp.count(pi.transp) && read_transp.at(pi.transp)) {
+                                       log("        Bram port %c%d.%d has incompatible read transparency.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1);
+                                       goto skip_bram_rport;
+                               }
+                               if (transp && (non_transp || (read_transp.count(pi.transp) && !read_transp.at(pi.transp)))) {
+                                       if (match.make_transp) {
                                                pi.make_transp = true;
-                                               if (pi.clocks != 0) {
-                                                       if (GetSize(mem.wr_ports) == 1 && wr_clkdom != clkdom) {
-                                                               log("        Bram port %c%d.%d cannot have soft transparency logic added as read and write clock domains differ.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1);
-                                                               goto skip_bram_rport;
-                                                       }
-                                                       enable_make_transp = true;
-                                               }
                                        } else {
                                                log("        Bram port %c%d.%d has incompatible read transparency.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1);
                                                goto skip_bram_rport;
@@ -689,8 +686,10 @@ grow_read_ports:;
                        if (pi.clocks) {
                                clock_domains[pi.clocks] = clkdom;
                                clock_polarities[pi.clkpol] = clkdom.second;
-                               if (!pi.make_transp)
-                                       read_transp[pi.transp] = transp;
+                               if (non_transp)
+                                       read_transp[pi.transp] = false;
+                               if (transp && !pi.make_transp)
+                                       read_transp[pi.transp] = true;
                        }
 
                        if (grow_read_ports_cursor < cell_port_i) {
@@ -793,10 +792,22 @@ grow_read_ports:;
 
        // At this point we are commited to replacing the RAM, and can mutate mem.
 
+       // Apply make_outreg and make_transp where necessary.
+       for (auto &pi : portinfos) {
+               if (pi.make_outreg)
+                       mem.extract_rdff(pi.mapped_port, initvals);
+               if (pi.make_transp) {
+                       auto &port = mem.rd_ports[pi.mapped_port];
+                       for (int i = 0; i < GetSize(mem.wr_ports); i++)
+                               if (port.transparency_mask[i])
+                                       mem.emulate_transparency(i, pi.mapped_port, initvals);
+               }
+       }
+
        // We don't really support priorities, emulate them.
        for (int i = 0; i < GetSize(mem.wr_ports); i++)
                for (int j = 0; j < i; j++)
-                       mem.emulate_priority(j, i);
+                       mem.emulate_priority(j, i, initvals);
 
        // Swizzle the init data.  Do this before changing mem.width, so that get_init_data works.
        bool cell_init = !mem.inits.empty();
@@ -861,29 +872,12 @@ grow_read_ports:;
        for (auto &other_bram : rules.brams.at(bram.name))
                bram.find_variant_params(variant_params, other_bram);
 
-       // Apply make_outreg where necessary.
-       for (auto &pi : portinfos)
-               if (pi.make_outreg)
-                       mem.extract_rdff(pi.mapped_port, initvals);
-
        // actually replace that memory cell
 
        dict<SigSpec, pair<SigSpec, SigSpec>> dout_cache;
 
        for (int grid_d = 0; grid_d < dcells; grid_d++)
        {
-               SigSpec mktr_wraddr, mktr_wrdata, mktr_wrdata_q;
-               vector<SigSpec> mktr_wren;
-
-               if (enable_make_transp) {
-                       mktr_wraddr = module->addWire(NEW_ID, bram.abits);
-                       mktr_wrdata = module->addWire(NEW_ID, bram.dbits);
-                       mktr_wrdata_q = module->addWire(NEW_ID, bram.dbits);
-                       module->addDff(NEW_ID, make_transp_clk.first, mktr_wrdata, mktr_wrdata_q, make_transp_clk.second);
-                       for (int grid_a = 0; grid_a < acells; grid_a++)
-                               mktr_wren.push_back(module->addWire(NEW_ID, make_transp_enbits));
-               }
-
                for (int grid_a = 0; grid_a < acells; grid_a++)
                for (int dupidx = 0; dupidx < dup_count; dupidx++)
                {
@@ -964,15 +958,6 @@ grow_read_ports:;
 
                                                c->setPort(stringf("\\%sEN", pf), sig_en);
 
-                                               if (enable_make_transp)
-                                                       module->connect(mktr_wren[grid_a], sig_en);
-                                       }
-                                       else if (enable_make_transp)
-                                               module->connect(mktr_wren[grid_a], addr_ok);
-
-                                       if (enable_make_transp && grid_a == 0) {
-                                               module->connect(mktr_wraddr, sig_addr);
-                                               module->connect(mktr_wrdata, sig_data);
                                        }
                                } else {
                                        if (pi.mapped_port == -1)
@@ -986,22 +971,6 @@ grow_read_ports:;
 
                                        SigSpec bram_dout = module->addWire(NEW_ID, bram.dbits);
                                        c->setPort(stringf("\\%sDATA", pf), bram_dout);
-                                       if (pi.make_transp) {
-                                               log("        Adding extra logic for transparent port %c%d.%d.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1);
-
-                                               SigSpec transp_en_d = module->Mux(NEW_ID, SigSpec(0, make_transp_enbits),
-                                                               mktr_wren[grid_a], module->Eq(NEW_ID, mktr_wraddr, sig_addr));
-
-                                               SigSpec transp_en_q = module->addWire(NEW_ID, make_transp_enbits);
-                                               module->addDff(NEW_ID, make_transp_clk.first, transp_en_d, transp_en_q, make_transp_clk.second);
-
-                                               for (int i = 0; i < make_transp_enbits; i++) {
-                                                       int en_width = bram.dbits / make_transp_enbits;
-                                                       SigSpec orig_bram_dout = bram_dout.extract(i * en_width, en_width);
-                                                       SigSpec bypass_dout = mktr_wrdata_q.extract(i * en_width, en_width);
-                                                       bram_dout.replace(i * en_width, module->Mux(NEW_ID, orig_bram_dout, bypass_dout, transp_en_q[i]));
-                                               }
-                                       }
 
                                        SigSpec addr_ok_q = addr_ok;
                                        if (port.clk_enable && !addr_ok.empty()) {
index 5cfb3f48a4ee3c9b5338589e1acde02af62f1268..e6b4b2400dc73b965165b593546bbd082a80eeff 100644 (file)
@@ -143,7 +143,8 @@ struct MemoryDffWorker
                port.addr = ff.sig_d;
                port.clk_enable = true;
                port.clk_polarity = ff.pol_clk;
-               port.transparent = true;
+               for (int i = 0; i < GetSize(mem.wr_ports); i++)
+                       port.transparency_mask[i] = true;
                mem.emit();
                log("merged address FF to cell.\n");
        }
index 91f36ce05cb164d69ff54394e6c49f1c5e4ff682..8499b46d8d6b2989ad04c50644d3afcf37a321f3 100644 (file)
@@ -22,6 +22,7 @@
 #include "kernel/sigtools.h"
 #include "kernel/modtools.h"
 #include "kernel/mem.h"
+#include "kernel/ffinit.h"
 
 USING_YOSYS_NAMESPACE
 PRIVATE_NAMESPACE_BEGIN
@@ -32,6 +33,7 @@ struct MemoryShareWorker
        RTLIL::Module *module;
        SigMap sigmap, sigmap_xmux;
        ModWalker modwalker;
+       FfInitVals initvals;
        bool flag_widen;
 
 
@@ -106,8 +108,6 @@ struct MemoryShareWorker
                                        continue;
                                if (port1.ce_over_srst != port2.ce_over_srst)
                                        continue;
-                               if (port1.transparent != port2.transparent)
-                                       continue;
                                // If the width of the ports doesn't match, they can still be
                                // merged by widening the narrow one.  Check if the conditions
                                // hold for that.
@@ -147,8 +147,10 @@ struct MemoryShareWorker
                                        continue;
                                if (!merge_rst_value(mem, srst_value, wide_log2, port1.srst_value, sub1, port2.srst_value, sub2))
                                        continue;
+                               // At this point we are committed to the merge.
                                {
                                        log("  Merging ports %d, %d (address %s).\n", i, j, log_signal(port1.addr));
+                                       mem.prepare_rd_merge(i, j, &initvals);
                                        mem.widen_prep(wide_log2);
                                        SigSpec new_data = module->addWire(NEW_ID, mem.width << wide_log2);
                                        module->connect(port1.data, new_data.extract(sub1 * mem.width, mem.width << port1.wide_log2));
@@ -231,7 +233,7 @@ struct MemoryShareWorker
                                                continue;
                                }
                                log("  Merging ports %d, %d (address %s).\n", i, j, log_signal(port1.addr));
-                               mem.prepare_wr_merge(i, j);
+                               mem.prepare_wr_merge(i, j, &initvals);
                                port1.addr = sigmap_xmux(port1.addr);
                                port2.addr = sigmap_xmux(port2.addr);
                                mem.widen_wr_port(i, wide_log2);
@@ -391,7 +393,7 @@ struct MemoryShareWorker
                                        }
 
                                        log("  Merging port %d into port %d.\n", idx2, idx1);
-                                       mem.prepare_wr_merge(idx1, idx2);
+                                       mem.prepare_wr_merge(idx1, idx2, &initvals);
                                        port_to_sat_variable.at(idx1) = qcsat.ez->OR(port_to_sat_variable.at(idx1), port_to_sat_variable.at(idx2));
 
                                        RTLIL::SigSpec last_addr = port1.addr;
@@ -453,6 +455,7 @@ struct MemoryShareWorker
 
                this->module = module;
                sigmap.set(module);
+               initvals.set(&sigmap, module);
 
                sigmap_xmux = sigmap;
                for (auto cell : module->cells())
index 9e04772b4e3001f7f6d2bdbcbb3d4878e4d35516..20a2a79ed845505b7aef00bb1c2d990ca7d61d93 100644 (file)
@@ -43,6 +43,7 @@ struct OptMemFeedbackWorker
        RTLIL::Design *design;
        RTLIL::Module *module;
        SigMap sigmap, sigmap_xmux;
+       FfInitVals initvals;
 
        dict<RTLIL::SigBit, std::pair<RTLIL::Cell*, int>> sig_to_mux;
        dict<RTLIL::SigBit, int> sig_users_count;
@@ -245,7 +246,7 @@ struct OptMemFeedbackWorker
 
                        for (int i = 0; i < wrport_idx; i++)
                                if (port.priority_mask[i])
-                                       mem.emulate_priority(i, wrport_idx);
+                                       mem.emulate_priority(i, wrport_idx, &initvals);
                }
 
                for (auto &it : portbit_conds)
@@ -278,6 +279,7 @@ struct OptMemFeedbackWorker
 
                this->module = module;
                sigmap.set(module);
+               initvals.set(&sigmap, module);
                sig_to_mux.clear();
                conditions_logic_cache.clear();