From e6f3d1c225abecf736782f43af4f36526c63f4c5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marcelina=20Ko=C5=9Bcielnicka?= Date: Sat, 31 Jul 2021 23:21:37 +0200 Subject: [PATCH] kernel/mem: Introduce transparency masks. --- backends/cxxrtl/cxxrtl_backend.cc | 39 ++-- backends/verilog/verilog_backend.cc | 18 +- kernel/mem.cc | 313 ++++++++++++++++++++++++++-- kernel/mem.h | 41 +++- passes/memory/memory_bram.cc | 97 +++------ passes/memory/memory_dff.cc | 3 +- passes/memory/memory_share.cc | 11 +- passes/opt/opt_mem_feedback.cc | 4 +- 8 files changed, 408 insertions(+), 118 deletions(-) diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index 95ad6a86e..40e61e5af 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -542,19 +542,21 @@ struct FlowGraph { add_uses(node, port.arst); add_uses(node, port.srst); add_uses(node, port.addr); - if (port.transparent && port.clk_enable) { - // Our implementation of transparent read ports reads en, addr and data from every write port - // in the same domain. - for (auto &wrport : mem->wr_ports) { - if (wrport.clk_enable && wrport.clk == port.clk && wrport.clk_polarity == port.clk_polarity) { - add_uses(node, wrport.en); - add_uses(node, wrport.addr); - add_uses(node, wrport.data); - } + bool transparent = false; + for (int j = 0; j < GetSize(mem->wr_ports); j++) { + auto &wrport = mem->wr_ports[j]; + if (port.transparency_mask[j]) { + // Our implementation of transparent read ports reads en, addr and data from every write port + // the read port is transparent with. + add_uses(node, wrport.en); + add_uses(node, wrport.addr); + add_uses(node, wrport.data); + transparent = true; } - // Also we read the address twice in this case (prevent inlining). - add_uses(node, port.addr); } + // Also we read the read address twice in this case (prevent inlining). + if (transparent) + add_uses(node, port.addr); } if (!mem->wr_ports.empty()) { Node *node = new Node; @@ -1604,17 +1606,18 @@ struct CxxrtlWorker { std::string lhs_temp = fresh_temporary(); f << indent << "value<" << mem->width << "> " << lhs_temp << " = " << mangle(mem) << "[" << valid_index_temp << ".index];\n"; - if (port.transparent && port.clk_enable) { + bool transparent = false; + for (auto bit : port.transparency_mask) + if (bit) + transparent = true; + if (transparent) { std::string addr_temp = fresh_temporary(); f << indent << "const value<" << port.addr.size() << "> &" << addr_temp << " = "; dump_sigspec_rhs(port.addr); f << ";\n"; - for (auto &wrport : mem->wr_ports) { - if (!wrport.clk_enable) - continue; - if (wrport.clk != port.clk) - continue; - if (wrport.clk_polarity != port.clk_polarity) + for (int i = 0; i < GetSize(mem->wr_ports); i++) { + auto &wrport = mem->wr_ports[i]; + if (!port.transparency_mask[i]) continue; f << indent << "if (" << addr_temp << " == "; dump_sigspec_rhs(wrport.addr); diff --git a/backends/verilog/verilog_backend.cc b/backends/verilog/verilog_backend.cc index 8f96c3a58..47b48a460 100644 --- a/backends/verilog/verilog_backend.cc +++ b/backends/verilog/verilog_backend.cc @@ -555,7 +555,10 @@ void dump_memory(std::ostream &f, std::string indent, Mem &mem) } // Decide how to represent the transparency; same idea as Mem::extract_rdff. - bool trans_use_addr = port.transparent; + bool trans_use_addr = true; + for (auto bit : port.transparency_mask) + if (!bit) + trans_use_addr = false; if (GetSize(mem.wr_ports) == 0) trans_use_addr = false; @@ -630,13 +633,7 @@ void dump_memory(std::ostream &f, std::string indent, Mem &mem) for (int i = 0; i < GetSize(mem.wr_ports); i++) { auto &wport = mem.wr_ports[i]; - if (!port.transparent) - continue; - if (!wport.clk_enable) - continue; - if (wport.clk != port.clk) - continue; - if (wport.clk_polarity != port.clk_polarity) + if (!port.transparency_mask[i] && !port.collision_x_mask[i]) continue; int min_wide_log2 = std::min(port.wide_log2, wport.wide_log2); int max_wide_log2 = std::max(port.wide_log2, wport.wide_log2); @@ -679,7 +676,10 @@ void dump_memory(std::ostream &f, std::string indent, Mem &mem) if (epos-pos != GetSize(port.data)) os2 << stringf("[%d:%d]", rsub * mem.width + epos-1, rsub * mem.width + pos); os2 << " <= "; - dump_sigspec(os2, wport.data.extract(wsub * mem.width + pos, epos-pos)); + if (port.transparency_mask[i]) + dump_sigspec(os2, wport.data.extract(wsub * mem.width + pos, epos-pos)); + else + dump_sigspec(os2, Const(State::Sx, epos - pos)); os2 << ";\n"; clk_to_lof_body[clk_domain_str].push_back(os2.str()); diff --git a/kernel/mem.cc b/kernel/mem.cc index a3b244eab..402ab5520 100644 --- a/kernel/mem.cc +++ b/kernel/mem.cc @@ -100,8 +100,14 @@ void Mem::emit() { std::swap(inits[i], inits[init_left[i]]); inits.resize(GetSize(init_left)); - // for future: handle transparency mask here - + for (auto &port : rd_ports) { + for (int i = 0; i < GetSize(wr_left); i++) { + port.transparency_mask[i] = port.transparency_mask[wr_left[i]]; + port.collision_x_mask[i] = port.collision_x_mask[wr_left[i]]; + } + port.transparency_mask.resize(GetSize(wr_left)); + port.collision_x_mask.resize(GetSize(wr_left)); + } for (auto &port : wr_ports) { for (int i = 0; i < GetSize(wr_left); i++) port.priority_mask[i] = port.priority_mask[wr_left[i]]; @@ -139,6 +145,20 @@ void Mem::emit() { log_assert(port.arst == State::S0); log_assert(port.srst == State::S0); log_assert(port.init_value == Const(State::Sx, width << port.wide_log2)); + bool transparent = false; + bool non_transparent = false; + if (port.clk_enable) { + for (int i = 0; i < GetSize(wr_ports); i++) { + auto &oport = wr_ports[i]; + if (oport.clk_enable && oport.clk == port.clk && oport.clk_polarity == port.clk_polarity) { + if (port.transparency_mask[i]) + transparent = true; + else if (!port.collision_x_mask[i]) + non_transparent = true; + } + } + log_assert(!transparent || !non_transparent); + } if (port.cell) { module->remove(port.cell); port.cell = nullptr; @@ -148,7 +168,7 @@ void Mem::emit() { rd_wide_continuation.bits.push_back(State(sub != 0)); rd_clk_enable.bits.push_back(State(port.clk_enable)); rd_clk_polarity.bits.push_back(State(port.clk_polarity)); - rd_transparent.bits.push_back(State(port.transparent)); + rd_transparent.bits.push_back(State(transparent)); rd_clk.append(port.clk); rd_en.append(port.en); SigSpec addr = port.sub_addr(sub); @@ -231,6 +251,20 @@ void Mem::emit() { log_assert(port.arst == State::S0); log_assert(port.srst == State::S0); log_assert(port.init_value == Const(State::Sx, width << port.wide_log2)); + bool transparent = false; + bool non_transparent = false; + if (port.clk_enable) { + for (int i = 0; i < GetSize(wr_ports); i++) { + auto &oport = wr_ports[i]; + if (oport.clk_enable && oport.clk == port.clk && oport.clk_polarity == port.clk_polarity) { + if (port.transparency_mask[i]) + transparent = true; + else if (!port.collision_x_mask[i]) + non_transparent = true; + } + } + log_assert(!transparent || !non_transparent); + } if (!port.cell) port.cell = module->addCell(NEW_ID, ID($memrd)); port.cell->attributes = port.attributes; @@ -239,7 +273,7 @@ void Mem::emit() { port.cell->parameters[ID::WIDTH] = width << port.wide_log2; port.cell->parameters[ID::CLK_ENABLE] = port.clk_enable; port.cell->parameters[ID::CLK_POLARITY] = port.clk_polarity; - port.cell->parameters[ID::TRANSPARENT] = port.transparent; + port.cell->parameters[ID::TRANSPARENT] = transparent; port.cell->setPort(ID::CLK, port.clk); port.cell->setPort(ID::EN, port.en); port.cell->setPort(ID::ADDR, port.addr); @@ -405,7 +439,6 @@ void Mem::check() { log_assert(GetSize(port.arst_value) == (width << port.wide_log2)); log_assert(GetSize(port.srst_value) == (width << port.wide_log2)); if (!port.clk_enable) { - log_assert(!port.transparent); log_assert(port.en == State::S1); log_assert(port.arst == State::S0); log_assert(port.srst == State::S0); @@ -414,6 +447,18 @@ void Mem::check() { log_assert(port.addr[j] == State::S0); } max_wide_log2 = std::max(max_wide_log2, port.wide_log2); + log_assert(GetSize(port.transparency_mask) == GetSize(wr_ports)); + log_assert(GetSize(port.collision_x_mask) == GetSize(wr_ports)); + for (int j = 0; j < GetSize(wr_ports); j++) { + auto &wport = wr_ports[j]; + if ((port.transparency_mask[j] || port.collision_x_mask[j]) && !wport.removed) { + log_assert(port.clk_enable); + log_assert(wport.clk_enable); + log_assert(port.clk == wport.clk); + log_assert(port.clk_polarity == wport.clk_polarity); + } + log_assert(!port.transparency_mask[j] || !port.collision_x_mask[j]); + } } for (int i = 0; i < GetSize(wr_ports); i++) { auto &port = wr_ports[i]; @@ -467,6 +512,7 @@ namespace { res.packed = false; res.mem = mem; res.attributes = mem->attributes; + std::vector rd_transparent; if (index.rd_ports.count(mem->name)) { for (auto cell : index.rd_ports.at(mem->name)) { MemRd mrd; @@ -474,7 +520,7 @@ namespace { mrd.attributes = cell->attributes; mrd.clk_enable = cell->parameters.at(ID::CLK_ENABLE).as_bool(); mrd.clk_polarity = cell->parameters.at(ID::CLK_POLARITY).as_bool(); - mrd.transparent = cell->parameters.at(ID::TRANSPARENT).as_bool(); + bool transparent = cell->parameters.at(ID::TRANSPARENT).as_bool(); mrd.clk = cell->getPort(ID::CLK); mrd.en = cell->getPort(ID::EN); mrd.addr = cell->getPort(ID::ADDR); @@ -491,11 +537,12 @@ namespace { // but don't want to see moving forwards: async transparent // ports (inherently meaningless) and async ports without // const 1 tied to EN bit (which may mean a latch in the future). - mrd.transparent = false; + transparent = false; if (mrd.en == State::Sx) mrd.en = State::S1; } res.rd_ports.push_back(mrd); + rd_transparent.push_back(transparent); } } if (index.wr_ports.count(mem->name)) { @@ -559,6 +606,25 @@ namespace { port.priority_mask[j] = true; } } + for (int i = 0; i < GetSize(res.rd_ports); i++) { + auto &port = res.rd_ports[i]; + port.transparency_mask.resize(GetSize(res.wr_ports)); + port.collision_x_mask.resize(GetSize(res.wr_ports)); + if (!rd_transparent[i]) + continue; + if (!port.clk_enable) + continue; + for (int j = 0; j < GetSize(res.wr_ports); j++) { + auto &wport = res.wr_ports[j]; + if (!wport.clk_enable) + continue; + if (port.clk != wport.clk) + continue; + if (port.clk_polarity != wport.clk_polarity) + continue; + port.transparency_mask[j] = true; + } + } res.check(); return res; } @@ -601,7 +667,6 @@ namespace { mrd.wide_log2 = 0; mrd.clk_enable = cell->parameters.at(ID::RD_CLK_ENABLE).extract(i, 1).as_bool(); mrd.clk_polarity = cell->parameters.at(ID::RD_CLK_POLARITY).extract(i, 1).as_bool(); - mrd.transparent = cell->parameters.at(ID::RD_TRANSPARENT).extract(i, 1).as_bool(); mrd.clk = cell->getPort(ID::RD_CLK).extract(i, 1); mrd.en = cell->getPort(ID::RD_EN).extract(i, 1); mrd.addr = cell->getPort(ID::RD_ADDR).extract(i * abits, abits); @@ -639,6 +704,25 @@ namespace { port.priority_mask[j] = true; } } + for (int i = 0; i < GetSize(res.rd_ports); i++) { + auto &port = res.rd_ports[i]; + port.transparency_mask.resize(GetSize(res.wr_ports)); + port.collision_x_mask.resize(GetSize(res.wr_ports)); + if (!cell->parameters.at(ID::RD_TRANSPARENT).extract(i, 1).as_bool()) + continue; + if (!port.clk_enable) + continue; + for (int j = 0; j < GetSize(res.wr_ports); j++) { + auto &wport = res.wr_ports[j]; + if (!wport.clk_enable) + continue; + if (port.clk != wport.clk) + continue; + if (port.clk_polarity != wport.clk_polarity) + continue; + port.transparency_mask[j] = true; + } + } res.check(); return res; } @@ -690,7 +774,10 @@ Cell *Mem::extract_rdff(int idx, FfInitVals *initvals) { // // - otherwise, put the FF on the data output, and make bypass paths for // all write ports wrt which this port is transparent - bool trans_use_addr = port.transparent; + bool trans_use_addr = true; + for (int i = 0; i < GetSize(wr_ports); i++) + if (!port.transparency_mask[i] && !wr_ports[i].removed) + trans_use_addr = false; // If there are no write ports at all, we could possibly use either way; do data // FF in this case. @@ -735,7 +822,9 @@ Cell *Mem::extract_rdff(int idx, FfInitVals *initvals) { for (int i = 0; i < GetSize(wr_ports); i++) { auto &wport = wr_ports[i]; - if (port.transparent) { + if (wport.removed) + continue; + if (port.transparency_mask[i] || port.collision_x_mask[i]) { log_assert(wport.clk_enable); log_assert(wport.clk == port.clk); log_assert(wport.clk_enable == port.clk_enable); @@ -761,7 +850,7 @@ Cell *Mem::extract_rdff(int idx, FfInitVals *initvals) { while (epos < ewidth && wport.en[epos + wsub * width] == wport.en[pos + wsub * width]) epos++; SigSpec cur = sig_d.extract(pos + rsub * width, epos-pos); - SigSpec other = wport.data.extract(pos + wsub * width, epos-pos); + SigSpec other = port.transparency_mask[i] ? wport.data.extract(pos + wsub * width, epos-pos) : Const(State::Sx, epos-pos); SigSpec cond; if (raddr != waddr) cond = module->And(stringf("$%s$rdtransgate[%d][%d][%d][%d]$d", memid.c_str(), idx, i, sub, pos), wport.en[pos + wsub * width], addr_eq); @@ -815,12 +904,16 @@ Cell *Mem::extract_rdff(int idx, FfInitVals *initvals) { port.srst = State::S0; port.clk_enable = false; port.clk_polarity = true; - port.transparent = false; port.ce_over_srst = false; port.arst_value = Const(State::Sx, GetSize(port.data)); port.srst_value = Const(State::Sx, GetSize(port.data)); port.init_value = Const(State::Sx, GetSize(port.data)); + for (int i = 0; i < GetSize(wr_ports); i++) { + port.transparency_mask[i] = false; + port.collision_x_mask[i] = false; + } + return c; } @@ -857,6 +950,12 @@ void Mem::narrow() { port.addr = port.sub_addr(it.second); port.wide_log2 = 0; } + port.transparency_mask.clear(); + port.collision_x_mask.clear(); + for (auto &it2 : new_wr_map) + port.transparency_mask.push_back(orig.transparency_mask[it2.first]); + for (auto &it2 : new_wr_map) + port.collision_x_mask.push_back(orig.collision_x_mask[it2.first]); new_rd_ports.push_back(port); } for (auto &it : new_wr_map) { @@ -879,12 +978,19 @@ void Mem::narrow() { std::swap(wr_ports, new_wr_ports); } -void Mem::emulate_priority(int idx1, int idx2) +void Mem::emulate_priority(int idx1, int idx2, FfInitVals *initvals) { auto &port1 = wr_ports[idx1]; auto &port2 = wr_ports[idx2]; if (!port2.priority_mask[idx1]) return; + for (int i = 0; i < GetSize(rd_ports); i++) { + auto &rport = rd_ports[i]; + if (rport.removed) + continue; + if (rport.transparency_mask[idx1] && !(rport.transparency_mask[idx2] || rport.collision_x_mask[idx2])) + emulate_transparency(idx1, i, initvals); + } int min_wide_log2 = std::min(port1.wide_log2, port2.wide_log2); int max_wide_log2 = std::max(port1.wide_log2, port2.wide_log2); bool wide1 = port1.wide_log2 > port2.wide_log2; @@ -916,7 +1022,99 @@ void Mem::emulate_priority(int idx1, int idx2) port2.priority_mask[idx1] = false; } -void Mem::prepare_wr_merge(int idx1, int idx2) { +void Mem::emulate_transparency(int widx, int ridx, FfInitVals *initvals) { + auto &wport = wr_ports[widx]; + auto &rport = rd_ports[ridx]; + log_assert(rport.transparency_mask[widx]); + // If other write ports have priority over this one, emulate their transparency too. + for (int i = GetSize(wr_ports) - 1; i > widx; i--) { + if (wr_ports[i].removed) + continue; + if (rport.transparency_mask[i] && wr_ports[i].priority_mask[widx]) + emulate_transparency(i, ridx, initvals); + } + int min_wide_log2 = std::min(rport.wide_log2, wport.wide_log2); + int max_wide_log2 = std::max(rport.wide_log2, wport.wide_log2); + bool wide_write = wport.wide_log2 > rport.wide_log2; + // The write data FF doesn't need full reset/init behavior, as it'll be masked by + // the mux whenever this would be relevant. It does, however, need to have the same + // clock enable signal as the read port. + SigSpec wdata_q = module->addWire(NEW_ID, GetSize(wport.data)); + module->addDffe(NEW_ID, rport.clk, rport.en, wport.data, wdata_q, rport.clk_polarity, true); + for (int sub = 0; sub < (1 << max_wide_log2); sub += (1 << min_wide_log2)) { + SigSpec raddr = rport.addr; + SigSpec waddr = wport.addr; + for (int j = min_wide_log2; j < max_wide_log2; j++) + if (wide_write) + waddr = wport.sub_addr(sub); + else + raddr = rport.sub_addr(sub); + SigSpec addr_eq; + if (raddr != waddr) + addr_eq = module->Eq(NEW_ID, raddr, waddr); + int pos = 0; + int ewidth = width << min_wide_log2; + int wsub = wide_write ? sub : 0; + int rsub = wide_write ? 0 : sub; + SigSpec rdata_a = module->addWire(NEW_ID, ewidth); + while (pos < ewidth) { + int epos = pos; + while (epos < ewidth && wport.en[epos + wsub * width] == wport.en[pos + wsub * width]) + epos++; + SigSpec cond; + if (raddr != waddr) + cond = module->And(NEW_ID, wport.en[pos + wsub * width], addr_eq); + else + cond = wport.en[pos + wsub * width]; + SigSpec cond_q = module->addWire(NEW_ID); + // The FF for storing the bypass enable signal must be carefully + // constructed to preserve the overall init/reset/enable behavior + // of the whole port. + FfData ff(initvals); + ff.width = 1; + ff.sig_q = cond_q; + ff.has_d = true; + ff.sig_d = cond; + ff.has_clk = true; + ff.sig_clk = rport.clk; + ff.pol_clk = rport.clk_polarity; + if (rport.en != State::S1) { + ff.has_en = true; + ff.sig_en = rport.en; + ff.pol_en = true; + } + if (rport.arst != State::S0) { + ff.has_arst = true; + ff.sig_arst = rport.arst; + ff.pol_arst = true; + ff.val_arst = State::S0; + } + if (rport.srst != State::S0) { + ff.has_srst = true; + ff.sig_srst = rport.srst; + ff.pol_srst = true; + ff.val_srst = State::S0; + ff.ce_over_srst = rport.ce_over_srst; + } + if (!rport.init_value.is_fully_undef()) + ff.val_init = State::S0; + else + ff.val_init = State::Sx; + ff.emit(module, NEW_ID); + // And the final bypass mux. + SigSpec cur = rdata_a.extract(pos, epos-pos); + SigSpec other = wdata_q.extract(pos + wsub * width, epos-pos); + SigSpec dest = rport.data.extract(pos + rsub * width, epos-pos); + module->addMux(NEW_ID, cur, other, cond_q, dest); + pos = epos; + } + rport.data.replace(rsub * width, rdata_a); + } + rport.transparency_mask[widx] = false; + rport.collision_x_mask[widx] = true; +} + +void Mem::prepare_wr_merge(int idx1, int idx2, FfInitVals *initvals) { log_assert(idx1 < idx2); auto &port1 = wr_ports[idx1]; auto &port2 = wr_ports[idx2]; @@ -926,14 +1124,97 @@ void Mem::prepare_wr_merge(int idx1, int idx2) { port1.priority_mask[i] = true; // If port 2 has priority over a port after port 1, emulate it. for (int i = idx1 + 1; i < idx2; i++) - if (port2.priority_mask[i]) - emulate_priority(i, idx2); + if (port2.priority_mask[i] && !wr_ports[i].removed) + emulate_priority(i, idx2, initvals); // If some port had priority over port 2, make it have priority over the merged port too. for (int i = idx2 + 1; i < GetSize(wr_ports); i++) { auto &oport = wr_ports[i]; if (oport.priority_mask[idx2]) oport.priority_mask[idx1] = true; } + // Make sure all read ports have identical collision/transparency behavior wrt both + // ports. + for (int i = 0; i < GetSize(rd_ports); i++) { + auto &rport = rd_ports[i]; + if (rport.removed) + continue; + // If collision already undefined with both ports, it's fine. + if (rport.collision_x_mask[idx1] && rport.collision_x_mask[idx2]) + continue; + // If one port has undefined collision, change it to the behavior + // of the other port. + if (rport.collision_x_mask[idx1]) { + rport.collision_x_mask[idx1] = false; + rport.transparency_mask[idx1] = rport.transparency_mask[idx2]; + continue; + } + if (rport.collision_x_mask[idx2]) { + rport.collision_x_mask[idx2] = false; + rport.transparency_mask[idx2] = rport.transparency_mask[idx1]; + continue; + } + // If transparent with both ports, also fine. + if (rport.transparency_mask[idx1] && rport.transparency_mask[idx2]) + continue; + // If transparent with only one, emulate it, and remove the collision-X + // flag that emulate_transparency will set (to align with the other port). + if (rport.transparency_mask[idx1]) { + emulate_transparency(i, idx1, initvals); + rport.collision_x_mask[idx1] = false; + continue; + } + if (rport.transparency_mask[idx2]) { + emulate_transparency(i, idx2, initvals); + rport.collision_x_mask[idx2] = false; + continue; + } + // If we got here, it's transparent with neither port, which is fine. + } +} + +void Mem::prepare_rd_merge(int idx1, int idx2, FfInitVals *initvals) { + auto &port1 = rd_ports[idx1]; + auto &port2 = rd_ports[idx2]; + // Note that going through write ports in order is important, since + // emulating transparency of a write port can change transparency + // mask for higher-numbered ports (due to transitive transparency + // emulation needed because of write port priority). + for (int i = 0; i < GetSize(wr_ports); i++) { + if (wr_ports[i].removed) + continue; + // Both ports undefined, OK. + if (port1.collision_x_mask[i] && port2.collision_x_mask[i]) + continue; + // Only one port undefined — change its behavior + // to align with the other port. + if (port1.collision_x_mask[i]) { + port1.collision_x_mask[i] = false; + port1.transparency_mask[i] = port2.transparency_mask[i]; + continue; + } + if (port2.collision_x_mask[i]) { + port2.collision_x_mask[i] = false; + port2.transparency_mask[i] = port1.transparency_mask[i]; + continue; + } + // Both ports transparent, OK. + if (port1.transparency_mask[i] && port2.transparency_mask[i]) + continue; + // Only one port transparent — emulate transparency + // on the other. + if (port1.transparency_mask[i]) { + emulate_transparency(i, idx1, initvals); + port1.collision_x_mask[i] = false; + continue; + } + if (port2.transparency_mask[i]) { + emulate_transparency(i, idx2, initvals); + port2.collision_x_mask[i] = false; + continue; + } + // No ports transparent, OK. + } + } void Mem::widen_prep(int wide_log2) { diff --git a/kernel/mem.h b/kernel/mem.h index 24c2d64c8..87a148beb 100644 --- a/kernel/mem.h +++ b/kernel/mem.h @@ -31,7 +31,19 @@ struct MemRd : RTLIL::AttrObject { int wide_log2; bool clk_enable, clk_polarity, ce_over_srst; Const arst_value, srst_value, init_value; - bool transparent; + // One bit for every write port, true iff simultanous read on this + // port and write on the other port will bypass the written data + // to this port's output (default behavior is to read old value). + // Can only be set for write ports that have the same clock domain. + std::vector transparency_mask; + // One bit for every write port, true iff simultanous read on this + // port and write on the other port will return an all-X (don't care) + // value. Mutually exclusive with transparency_mask. + // Can only be set for write ports that have the same clock domain. + // For optimization purposes, this will also be set if we can + // determine that the two ports can never be active simultanously + // (making the above vacuously true). + std::vector collision_x_mask; SigSpec clk, en, arst, srst, addr, data; MemRd() : removed(false), cell(nullptr) {} @@ -139,15 +151,34 @@ struct Mem : RTLIL::AttrObject { // If write port idx2 currently has priority over write port idx1, // inserts extra logic on idx1's enable signal to disable writes // when idx2 is writing to the same address, then removes the priority - // from the priority mask. - void emulate_priority(int idx1, int idx2); + // from the priority mask. If there is a memory port that is + // transparent with idx1, but not with idx2, that port is converted + // to use soft transparency logic. + void emulate_priority(int idx1, int idx2, FfInitVals *initvals); + + // Creates soft-transparency logic on read port ridx, bypassing the + // data from write port widx. Should only be called when ridx is + // transparent wrt widx in the first place. Once we're done, the + // transparency_mask bit will be cleared, and the collision_x_mask + // bit will be set instead (since whatever value is read will be + // replaced by the soft transparency logic). + void emulate_transparency(int widx, int ridx, FfInitVals *initvals); // Prepares for merging write port idx2 into idx1 (where idx1 < idx2). // Specifically, takes care of priority masks: any priority relations // that idx2 had are replicated onto idx1, unless they conflict with // priorities already present on idx1, in which case emulate_priority - // is called. - void prepare_wr_merge(int idx1, int idx2); + // is called. Likewise, ensures transparency and undefined collision + // masks of all read ports have the same values for both ports, + // calling emulate_transparency if necessary. + void prepare_wr_merge(int idx1, int idx2, FfInitVals *initvals); + + // Prepares for merging read port idx2 into idx1. + // Specifically, makes sure the transparency and undefined collision + // masks of both ports are equal, by changing undefined behavior + // of one port to the other's defined behavior, or by calling + // emulate_transparency if necessary. + void prepare_rd_merge(int idx1, int idx2, FfInitVals *initvals); // Prepares the memory for widening a port to a given width. This // involves ensuring that start_offset and size are aligned to the diff --git a/passes/memory/memory_bram.cc b/passes/memory/memory_bram.cc index af8137ada..fed9d60c0 100644 --- a/passes/memory/memory_bram.cc +++ b/passes/memory/memory_bram.cc @@ -405,10 +405,6 @@ bool replace_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals, const auto portinfos = bram.make_portinfos(); int dup_count = 1; - pair make_transp_clk; - bool enable_make_transp = false; - int make_transp_enbits = 0; - dict> clock_domains; dict clock_polarities; dict read_transp; @@ -496,8 +492,6 @@ bool replace_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals, const for (; bram_port_i < GetSize(portinfos); bram_port_i++) { auto &pi = portinfos[bram_port_i]; - make_transp_enbits = pi.enable ? pi.enable : 1; - make_transp_clk = clkdom; if (pi.wrmode != 1) skip_bram_wport: @@ -606,10 +600,16 @@ grow_read_ports:; for (int cell_port_i = 0; cell_port_i < GetSize(mem.rd_ports); cell_port_i++) { auto &port = mem.rd_ports[cell_port_i]; - bool transp = port.transparent; - - if (mem.wr_ports.empty()) - transp = false; + bool transp = false; + bool non_transp = false; + + if (port.clk_enable) { + for (int i = 0; i < GetSize(mem.wr_ports); i++) + if (port.transparency_mask[i]) + transp = true; + else if (!port.collision_x_mask[i]) + non_transp = true; + } pair clkdom(port.clk, port.clk_polarity); if (!port.clk_enable) @@ -660,16 +660,13 @@ grow_read_ports:; log(" Bram port %c%d.%d has no initial value support.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; } - if (read_transp.count(pi.transp) && read_transp.at(pi.transp) != transp) { - if (match.make_transp && GetSize(mem.wr_ports) <= 1) { + if (non_transp && read_transp.count(pi.transp) && read_transp.at(pi.transp)) { + log(" Bram port %c%d.%d has incompatible read transparency.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); + goto skip_bram_rport; + } + if (transp && (non_transp || (read_transp.count(pi.transp) && !read_transp.at(pi.transp)))) { + if (match.make_transp) { pi.make_transp = true; - if (pi.clocks != 0) { - if (GetSize(mem.wr_ports) == 1 && wr_clkdom != clkdom) { - log(" Bram port %c%d.%d cannot have soft transparency logic added as read and write clock domains differ.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); - goto skip_bram_rport; - } - enable_make_transp = true; - } } else { log(" Bram port %c%d.%d has incompatible read transparency.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); goto skip_bram_rport; @@ -689,8 +686,10 @@ grow_read_ports:; if (pi.clocks) { clock_domains[pi.clocks] = clkdom; clock_polarities[pi.clkpol] = clkdom.second; - if (!pi.make_transp) - read_transp[pi.transp] = transp; + if (non_transp) + read_transp[pi.transp] = false; + if (transp && !pi.make_transp) + read_transp[pi.transp] = true; } if (grow_read_ports_cursor < cell_port_i) { @@ -793,10 +792,22 @@ grow_read_ports:; // At this point we are commited to replacing the RAM, and can mutate mem. + // Apply make_outreg and make_transp where necessary. + for (auto &pi : portinfos) { + if (pi.make_outreg) + mem.extract_rdff(pi.mapped_port, initvals); + if (pi.make_transp) { + auto &port = mem.rd_ports[pi.mapped_port]; + for (int i = 0; i < GetSize(mem.wr_ports); i++) + if (port.transparency_mask[i]) + mem.emulate_transparency(i, pi.mapped_port, initvals); + } + } + // We don't really support priorities, emulate them. for (int i = 0; i < GetSize(mem.wr_ports); i++) for (int j = 0; j < i; j++) - mem.emulate_priority(j, i); + mem.emulate_priority(j, i, initvals); // Swizzle the init data. Do this before changing mem.width, so that get_init_data works. bool cell_init = !mem.inits.empty(); @@ -861,29 +872,12 @@ grow_read_ports:; for (auto &other_bram : rules.brams.at(bram.name)) bram.find_variant_params(variant_params, other_bram); - // Apply make_outreg where necessary. - for (auto &pi : portinfos) - if (pi.make_outreg) - mem.extract_rdff(pi.mapped_port, initvals); - // actually replace that memory cell dict> dout_cache; for (int grid_d = 0; grid_d < dcells; grid_d++) { - SigSpec mktr_wraddr, mktr_wrdata, mktr_wrdata_q; - vector mktr_wren; - - if (enable_make_transp) { - mktr_wraddr = module->addWire(NEW_ID, bram.abits); - mktr_wrdata = module->addWire(NEW_ID, bram.dbits); - mktr_wrdata_q = module->addWire(NEW_ID, bram.dbits); - module->addDff(NEW_ID, make_transp_clk.first, mktr_wrdata, mktr_wrdata_q, make_transp_clk.second); - for (int grid_a = 0; grid_a < acells; grid_a++) - mktr_wren.push_back(module->addWire(NEW_ID, make_transp_enbits)); - } - for (int grid_a = 0; grid_a < acells; grid_a++) for (int dupidx = 0; dupidx < dup_count; dupidx++) { @@ -964,15 +958,6 @@ grow_read_ports:; c->setPort(stringf("\\%sEN", pf), sig_en); - if (enable_make_transp) - module->connect(mktr_wren[grid_a], sig_en); - } - else if (enable_make_transp) - module->connect(mktr_wren[grid_a], addr_ok); - - if (enable_make_transp && grid_a == 0) { - module->connect(mktr_wraddr, sig_addr); - module->connect(mktr_wrdata, sig_data); } } else { if (pi.mapped_port == -1) @@ -986,22 +971,6 @@ grow_read_ports:; SigSpec bram_dout = module->addWire(NEW_ID, bram.dbits); c->setPort(stringf("\\%sDATA", pf), bram_dout); - if (pi.make_transp) { - log(" Adding extra logic for transparent port %c%d.%d.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); - - SigSpec transp_en_d = module->Mux(NEW_ID, SigSpec(0, make_transp_enbits), - mktr_wren[grid_a], module->Eq(NEW_ID, mktr_wraddr, sig_addr)); - - SigSpec transp_en_q = module->addWire(NEW_ID, make_transp_enbits); - module->addDff(NEW_ID, make_transp_clk.first, transp_en_d, transp_en_q, make_transp_clk.second); - - for (int i = 0; i < make_transp_enbits; i++) { - int en_width = bram.dbits / make_transp_enbits; - SigSpec orig_bram_dout = bram_dout.extract(i * en_width, en_width); - SigSpec bypass_dout = mktr_wrdata_q.extract(i * en_width, en_width); - bram_dout.replace(i * en_width, module->Mux(NEW_ID, orig_bram_dout, bypass_dout, transp_en_q[i])); - } - } SigSpec addr_ok_q = addr_ok; if (port.clk_enable && !addr_ok.empty()) { diff --git a/passes/memory/memory_dff.cc b/passes/memory/memory_dff.cc index 5cfb3f48a..e6b4b2400 100644 --- a/passes/memory/memory_dff.cc +++ b/passes/memory/memory_dff.cc @@ -143,7 +143,8 @@ struct MemoryDffWorker port.addr = ff.sig_d; port.clk_enable = true; port.clk_polarity = ff.pol_clk; - port.transparent = true; + for (int i = 0; i < GetSize(mem.wr_ports); i++) + port.transparency_mask[i] = true; mem.emit(); log("merged address FF to cell.\n"); } diff --git a/passes/memory/memory_share.cc b/passes/memory/memory_share.cc index 91f36ce05..8499b46d8 100644 --- a/passes/memory/memory_share.cc +++ b/passes/memory/memory_share.cc @@ -22,6 +22,7 @@ #include "kernel/sigtools.h" #include "kernel/modtools.h" #include "kernel/mem.h" +#include "kernel/ffinit.h" USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN @@ -32,6 +33,7 @@ struct MemoryShareWorker RTLIL::Module *module; SigMap sigmap, sigmap_xmux; ModWalker modwalker; + FfInitVals initvals; bool flag_widen; @@ -106,8 +108,6 @@ struct MemoryShareWorker continue; if (port1.ce_over_srst != port2.ce_over_srst) continue; - if (port1.transparent != port2.transparent) - continue; // If the width of the ports doesn't match, they can still be // merged by widening the narrow one. Check if the conditions // hold for that. @@ -147,8 +147,10 @@ struct MemoryShareWorker continue; if (!merge_rst_value(mem, srst_value, wide_log2, port1.srst_value, sub1, port2.srst_value, sub2)) continue; + // At this point we are committed to the merge. { log(" Merging ports %d, %d (address %s).\n", i, j, log_signal(port1.addr)); + mem.prepare_rd_merge(i, j, &initvals); mem.widen_prep(wide_log2); SigSpec new_data = module->addWire(NEW_ID, mem.width << wide_log2); module->connect(port1.data, new_data.extract(sub1 * mem.width, mem.width << port1.wide_log2)); @@ -231,7 +233,7 @@ struct MemoryShareWorker continue; } log(" Merging ports %d, %d (address %s).\n", i, j, log_signal(port1.addr)); - mem.prepare_wr_merge(i, j); + mem.prepare_wr_merge(i, j, &initvals); port1.addr = sigmap_xmux(port1.addr); port2.addr = sigmap_xmux(port2.addr); mem.widen_wr_port(i, wide_log2); @@ -391,7 +393,7 @@ struct MemoryShareWorker } log(" Merging port %d into port %d.\n", idx2, idx1); - mem.prepare_wr_merge(idx1, idx2); + mem.prepare_wr_merge(idx1, idx2, &initvals); port_to_sat_variable.at(idx1) = qcsat.ez->OR(port_to_sat_variable.at(idx1), port_to_sat_variable.at(idx2)); RTLIL::SigSpec last_addr = port1.addr; @@ -453,6 +455,7 @@ struct MemoryShareWorker this->module = module; sigmap.set(module); + initvals.set(&sigmap, module); sigmap_xmux = sigmap; for (auto cell : module->cells()) diff --git a/passes/opt/opt_mem_feedback.cc b/passes/opt/opt_mem_feedback.cc index 9e04772b4..20a2a79ed 100644 --- a/passes/opt/opt_mem_feedback.cc +++ b/passes/opt/opt_mem_feedback.cc @@ -43,6 +43,7 @@ struct OptMemFeedbackWorker RTLIL::Design *design; RTLIL::Module *module; SigMap sigmap, sigmap_xmux; + FfInitVals initvals; dict> sig_to_mux; dict sig_users_count; @@ -245,7 +246,7 @@ struct OptMemFeedbackWorker for (int i = 0; i < wrport_idx; i++) if (port.priority_mask[i]) - mem.emulate_priority(i, wrport_idx); + mem.emulate_priority(i, wrport_idx, &initvals); } for (auto &it : portbit_conds) @@ -278,6 +279,7 @@ struct OptMemFeedbackWorker this->module = module; sigmap.set(module); + initvals.set(&sigmap, module); sig_to_mux.clear(); conditions_logic_cache.clear(); -- 2.30.2