From 4451f7f5e9b451a7885259554cd3e9562bbf8f88 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marcelina=20Ko=C5=9Bcielnicka?= Date: Sun, 1 Aug 2021 01:29:49 +0200 Subject: [PATCH] memory_bram: Some refactoring This will make more sense when the new transparency masks land. Fixes #2902. --- passes/memory/memory_bram.cc | 370 ++++++++++++++++------------------- 1 file changed, 174 insertions(+), 196 deletions(-) diff --git a/passes/memory/memory_bram.cc b/passes/memory/memory_bram.cc index 7b3df8eec..31fd769b0 100644 --- a/passes/memory/memory_bram.cc +++ b/passes/memory/memory_bram.cc @@ -30,9 +30,6 @@ struct rules_t int group, index, dupidx; int wrmode, enable, transp, clocks, clkpol; - SigBit sig_clock; - SigSpec sig_addr, sig_data, sig_en; - bool effective_clkpol; bool make_transp; bool make_outreg; int mapped_port; @@ -93,7 +90,6 @@ struct rules_t pi.mapped_port = -1; pi.make_transp = false; pi.make_outreg = false; - pi.effective_clkpol = false; portinfos.push_back(pi); } return portinfos; @@ -402,10 +398,8 @@ struct rules_t } }; -bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, const rules_t::bram_t &bram, const rules_t::match_t &match, dict &match_properties, int mode) +bool replace_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals, const rules_t::bram_t &bram, const rules_t::match_t &match, dict &match_properties, int mode) { - // We will modify ports — make a copy of the structure. - Mem mem(orig_mem); Module *module = mem.module; auto portinfos = bram.make_portinfos(); @@ -441,35 +435,12 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c log(" Mapping to bram type %s (variant %d):\n", log_id(bram.name), bram.variant); // bram.dump_config(); - bool cell_init = !mem.inits.empty(); - vector initdata; - - if (cell_init) { - Const initparam = mem.get_init_data(); - initdata.reserve(mem.size); - for (int i=0; i < mem.size; i++) - initdata.push_back(initparam.extract(mem.width*i, mem.width, State::Sx)); - } - + std::vector shuffle_map; if (match.shuffle_enable && bram.dbits >= portinfos.at(match.shuffle_enable - 'A').enable*2 && portinfos.at(match.shuffle_enable - 'A').enable > 0 && !mem.wr_ports.empty()) { int bucket_size = bram.dbits / portinfos.at(match.shuffle_enable - 'A').enable; log(" Shuffle bit order to accommodate enable buckets of size %d..\n", bucket_size); - // extract unshuffled data/enable bits - - std::vector old_wr_en; - std::vector old_wr_data; - std::vector old_rd_data; - - for (auto &port : mem.wr_ports) { - old_wr_en.push_back(port.en); - old_wr_data.push_back(port.data); - } - - for (auto &port : mem.rd_ports) - old_rd_data.push_back(port.data); - // analyze enable structure std::vector en_order; @@ -484,52 +455,13 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c bits_wr_en[sig].push_back(i); } - // re-create memory ports - - std::vector new_wr_en(GetSize(old_wr_en)); - std::vector new_wr_data(GetSize(old_wr_data)); - std::vector new_rd_data(GetSize(old_rd_data)); - std::vector> new_initdata; - std::vector shuffle_map; - - if (cell_init) - new_initdata.resize(mem.size); - for (auto &it : en_order) { - auto &bits = bits_wr_en.at(it); - int buckets = (GetSize(bits) + bucket_size - 1) / bucket_size; - int fillbits = buckets*bucket_size - GetSize(bits); - SigBit fillbit; - - for (int i = 0; i < GetSize(bits); i++) { - for (int j = 0; j < GetSize(mem.wr_ports); j++) { - new_wr_en[j].append(old_wr_en[j][bits[i]]); - new_wr_data[j].append(old_wr_data[j][bits[i]]); - fillbit = old_wr_en[j][bits[i]]; - } - for (int j = 0; j < GetSize(mem.rd_ports); j++) - new_rd_data[j].append(old_rd_data[j][bits[i]]); - if (cell_init) { - for (int j = 0; j < mem.size; j++) - new_initdata[j].push_back(initdata[j][bits[i]]); - } - shuffle_map.push_back(bits[i]); - } + for (auto bit : bits_wr_en.at(it)) + shuffle_map.push_back(bit); - for (int i = 0; i < fillbits; i++) { - for (int j = 0; j < GetSize(mem.wr_ports); j++) { - new_wr_en[j].append(fillbit); - new_wr_data[j].append(State::S0); - } - for (int j = 0; j < GetSize(mem.rd_ports); j++) - new_rd_data[j].append(State::Sx); - if (cell_init) { - for (int j = 0; j < mem.size; j++) - new_initdata[j].push_back(State::Sx); - } + while (GetSize(shuffle_map) % bucket_size) shuffle_map.push_back(-1); - } } log(" Results of bit order shuffling:"); @@ -538,26 +470,15 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c log("\n"); // update mem_*, wr_*, and rd_* variables - - mem.width = GetSize(new_wr_en.front()); - - for (int i = 0; i < GetSize(mem.wr_ports); i++) { - auto &port = mem.wr_ports[i]; - port.en = new_wr_en[i]; - port.data = new_wr_data[i]; - } - - for (int i = 0; i < GetSize(mem.rd_ports); i++) { - auto &port = mem.rd_ports[i]; - port.data = new_rd_data[i]; - } - - if (cell_init) { - for (int i = 0; i < mem.size; i++) - initdata[i] = Const(new_initdata[i]); - } + } else { + for (int i = 0; i < mem.width; i++) + shuffle_map.push_back(i); } + // Align width up to dbits. + while (GetSize(shuffle_map) % bram.dbits) + shuffle_map.push_back(-1); + // assign write ports pair wr_clkdom; for (int cell_port_i = 0, bram_port_i = 0; cell_port_i < GetSize(mem.wr_ports); cell_port_i++) @@ -575,7 +496,7 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c for (; bram_port_i < GetSize(portinfos); bram_port_i++) { auto &pi = portinfos[bram_port_i]; - make_transp_enbits = pi.enable; + make_transp_enbits = pi.enable ? pi.enable : 1; make_transp_clk = clkdom; if (pi.wrmode != 1) @@ -602,16 +523,25 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c } } - SigSpec sig_en; - SigBit last_en_bit = State::S1; - for (int i = 0; i < mem.width; i++) { - if (pi.enable && i % (bram.dbits / pi.enable) == 0) { - last_en_bit = port.en[i]; - sig_en.append(last_en_bit); - } - if (last_en_bit != port.en[i]) { - log(" Bram port %c%d has incompatible enable structure.\n", pi.group + 'A', pi.index + 1); - goto skip_bram_wport; + // We need to check enable compatibility of this port, as well as all + // ports that have priority over this one (because they will be involved + // in emulate_priority logic). + + for (int i = 0; i < GetSize(mem.wr_ports); i++) { + auto &oport = mem.wr_ports[i]; + if (i != cell_port_i && !oport.priority_mask[cell_port_i]) + continue; + SigBit last_en_bit = State::S1; + for (int j = 0; j < GetSize(shuffle_map); j++) { + if (shuffle_map[j] == -1) + continue; + SigBit en_bit = oport.en[shuffle_map[j]]; + if (pi.enable && j % (bram.dbits / pi.enable) == 0) + last_en_bit = en_bit; + if (last_en_bit != en_bit) { + log(" Bram port %c%d has incompatible enable structure.\n", pi.group + 'A', pi.index + 1); + goto skip_bram_wport; + } } } @@ -621,14 +551,8 @@ bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, c if (port.clk_enable) { clock_domains[pi.clocks] = clkdom; clock_polarities[pi.clkpol] = clkdom.second; - pi.sig_clock = clkdom.first; - pi.effective_clkpol = clkdom.second; } - pi.sig_en = sig_en; - pi.sig_addr = port.addr; - pi.sig_data = port.data; - bram_port_i++; goto mapped_wr_port; } @@ -651,10 +575,6 @@ grow_read_ports:; for (auto &pi : portinfos) { if (pi.wrmode == 0) { pi.mapped_port = -1; - pi.sig_clock = SigBit(); - pi.sig_addr = SigSpec(); - pi.sig_data = SigSpec(); - pi.sig_en = SigSpec(); pi.make_outreg = false; pi.make_transp = false; } @@ -766,19 +686,13 @@ grow_read_ports:; log(" Mapped to bram port %c%d.%d.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1); pi.mapped_port = cell_port_i; - if (port.clk_enable && !pi.make_outreg) { + if (pi.clocks) { clock_domains[pi.clocks] = clkdom; clock_polarities[pi.clkpol] = clkdom.second; if (!pi.make_transp) read_transp[pi.transp] = transp; - pi.sig_clock = clkdom.first; - pi.sig_en = port.en; - pi.effective_clkpol = clkdom.second; } - pi.sig_addr = port.addr; - pi.sig_data = port.data; - if (grow_read_ports_cursor < cell_port_i) { grow_read_ports_cursor = cell_port_i; try_growing_more_read_ports = true; @@ -798,17 +712,19 @@ grow_read_ports:; // update properties and re-check conditions + int dcells = GetSize(shuffle_map) / bram.dbits; + int acells = (mem.size + (1 << bram.abits) - 1) / (1 << bram.abits); if (mode <= 1) { match_properties["dups"] = dup_count; match_properties["waste"] = match_properties["dups"] * match_properties["bwaste"]; - int cells = ((mem.width + bram.dbits - 1) / bram.dbits) * ((mem.size + (1 << bram.abits) - 1) / (1 << bram.abits)); + int cells = dcells * acells; match_properties["efficiency"] = (100 * match_properties["bits"]) / (dup_count * cells * bram.dbits * (1 << bram.abits)); - match_properties["dcells"] = ((mem.width + bram.dbits - 1) / bram.dbits); - match_properties["acells"] = ((mem.size + (1 << bram.abits) - 1) / (1 << bram.abits)); - match_properties["cells"] = match_properties["dcells"] * match_properties["acells"] * match_properties["dups"]; + match_properties["dcells"] = dcells; + match_properties["acells"] = acells; + match_properties["cells"] = cells * dup_count; log(" Updated properties: dups=%d waste=%d efficiency=%d\n", match_properties["dups"], match_properties["waste"], match_properties["efficiency"]); @@ -875,6 +791,70 @@ grow_read_ports:; return true; } + // At this point we are commited to replacing the RAM, and can mutate mem. + + // We don't really support priorities, emulate them. + for (int i = 0; i < GetSize(mem.wr_ports); i++) + for (int j = 0; j < i; j++) + mem.emulate_priority(j, i); + + // Now the big swizzle. + mem.width = GetSize(shuffle_map); + + // Swizzle write ports. + for (auto &port : mem.wr_ports) { + SigSpec new_en, new_data; + SigBit en_bit = State::S1; + for (auto idx : shuffle_map) { + if (idx == -1) { + new_data.append(State::Sx); + } else { + new_data.append(port.data[idx]); + en_bit = port.en[idx]; + } + new_en.append(en_bit); + } + port.en = new_en; + port.data = new_data; + } + + // Swizzle read ports. + for (auto &port : mem.rd_ports) { + SigSpec new_data = module->addWire(NEW_ID, mem.width); + Const new_init_value = Const(State::Sx, mem.width); + Const new_arst_value = Const(State::Sx, mem.width); + Const new_srst_value = Const(State::Sx, mem.width); + for (int i = 0; i < mem.width; i++) + if (shuffle_map[i] != -1) { + module->connect(port.data[shuffle_map[i]], new_data[i]); + new_init_value[i] = port.init_value[shuffle_map[i]]; + new_arst_value[i] = port.arst_value[shuffle_map[i]]; + new_srst_value[i] = port.srst_value[shuffle_map[i]]; + } + port.data = new_data; + port.init_value = new_init_value; + port.arst_value = new_arst_value; + port.srst_value = new_srst_value; + } + + // Swizzle the init data. + bool cell_init = !mem.inits.empty(); + vector initdata; + if (cell_init) { + Const initparam = mem.get_init_data(); + initdata.reserve(mem.size); + for (int i = 0; i < mem.size; i++) { + std::vector val; + for (auto idx : shuffle_map) { + if (idx == -1) + val.push_back(State::Sx); + else + val.push_back(initparam[mem.width * i + idx]); + } + initdata.push_back(Const(val)); + } + } + // prepare variant parameters dict variant_params; @@ -882,20 +862,15 @@ grow_read_ports:; bram.find_variant_params(variant_params, other_bram); // Apply make_outreg where necessary. - for (auto &pi : portinfos) { - if (pi.make_outreg) { + for (auto &pi : portinfos) + if (pi.make_outreg) mem.extract_rdff(pi.mapped_port, initvals); - auto &port = mem.rd_ports[pi.mapped_port]; - pi.sig_addr = port.addr; - pi.sig_data = port.data; - } - } // actually replace that memory cell dict> dout_cache; - for (int grid_d = 0; grid_d*bram.dbits < mem.width; grid_d++) + for (int grid_d = 0; grid_d < dcells; grid_d++) { SigSpec mktr_wraddr, mktr_wrdata, mktr_wrdata_q; vector mktr_wren; @@ -905,11 +880,11 @@ grow_read_ports:; mktr_wrdata = module->addWire(NEW_ID, bram.dbits); mktr_wrdata_q = module->addWire(NEW_ID, bram.dbits); module->addDff(NEW_ID, make_transp_clk.first, mktr_wrdata, mktr_wrdata_q, make_transp_clk.second); - for (int grid_a = 0; grid_a*(1 << bram.abits) < mem.size; grid_a++) + for (int grid_a = 0; grid_a < acells; grid_a++) mktr_wren.push_back(module->addWire(NEW_ID, make_transp_enbits)); } - for (int grid_a = 0; grid_a*(1 << bram.abits) < mem.size; grid_a++) + for (int grid_a = 0; grid_a < acells; grid_a++) for (int dupidx = 0; dupidx < dup_count; dupidx++) { Cell *c = module->addCell(module->uniquify(stringf("%s.%d.%d.%d", mem.memid.c_str(), grid_d, grid_a, dupidx)), bram.name); @@ -919,18 +894,16 @@ grow_read_ports:; c->setParam(vp.first, vp.second); if (cell_init) { - int init_offset = grid_a*(1 << bram.abits); + int init_offset = grid_a*(1 << bram.abits) - mem.start_offset; int init_shift = grid_d*bram.dbits; int init_size = (1 << bram.abits); Const initparam(State::Sx, init_size*bram.dbits); - for (int i = 0; i < init_size; i++) { - State padding = State::Sx; + for (int i = 0; i < init_size; i++) for (int j = 0; j < bram.dbits; j++) - if (init_offset+i < GetSize(initdata) && init_shift+j < GetSize(initdata[init_offset+i])) + if (init_offset+i < GetSize(initdata) && init_offset+i >= 0) initparam[i*bram.dbits+j] = initdata[init_offset+i][init_shift+j]; else - initparam[i*bram.dbits+j] = padding; - } + initparam[i*bram.dbits+j] = State::Sx; c->setParam(ID::INIT, initparam); } @@ -942,55 +915,75 @@ grow_read_ports:; string prefix = stringf("%c%d", pi.group + 'A', pi.index + 1); const char *pf = prefix.c_str(); - if (pi.clocks && (!c->hasPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1)) || pi.sig_clock.wire)) { - c->setPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1), pi.sig_clock); - if (pi.clkpol > 1 && pi.sig_clock.wire) - c->setParam(stringf("\\CLKPOL%d", (pi.clkpol-1) % clkpol_max + 1), clock_polarities.at(pi.clkpol)); - if (pi.transp > 1 && pi.sig_clock.wire) - c->setParam(stringf("\\TRANSP%d", (pi.transp-1) % transp_max + 1), read_transp.at(pi.transp)); - } + if (pi.clocks && clock_domains.count(pi.clocks)) + c->setPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1), clock_domains.at(pi.clocks).first); + if (pi.clkpol > 1 && clock_polarities.count(pi.clkpol)) + c->setParam(stringf("\\CLKPOL%d", (pi.clkpol-1) % clkpol_max + 1), clock_polarities.at(pi.clkpol)); + if (pi.transp > 1 && read_transp.count(pi.transp)) + c->setParam(stringf("\\TRANSP%d", (pi.transp-1) % transp_max + 1), read_transp.at(pi.transp)); SigSpec addr_ok; - if (GetSize(pi.sig_addr) > bram.abits) { - SigSpec extra_addr = pi.sig_addr.extract(bram.abits, GetSize(pi.sig_addr) - bram.abits); + SigSpec sig_addr; + if (pi.mapped_port >= 0) { + if (pi.wrmode == 1) + sig_addr = mem.wr_ports[pi.mapped_port].addr; + else + sig_addr = mem.rd_ports[pi.mapped_port].addr; + } + + if (GetSize(sig_addr) > bram.abits) { + SigSpec extra_addr = sig_addr.extract(bram.abits, GetSize(sig_addr) - bram.abits); SigSpec extra_addr_sel = SigSpec(grid_a, GetSize(extra_addr)); addr_ok = module->Eq(NEW_ID, extra_addr, extra_addr_sel); } - if (pi.enable) - { - SigSpec sig_en = pi.sig_en; + sig_addr.extend_u0(bram.abits); + c->setPort(stringf("\\%sADDR", pf), sig_addr); - if (pi.wrmode == 1) { - sig_en.extend_u0((grid_d+1) * pi.enable); - sig_en = sig_en.extract(grid_d * pi.enable, pi.enable); + if (pi.wrmode == 1) { + if (pi.mapped_port == -1) + { + if (pi.enable) + c->setPort(stringf("\\%sEN", pf), Const(State::S0, pi.enable)); + continue; } - if (!addr_ok.empty()) - sig_en = module->Mux(NEW_ID, SigSpec(0, GetSize(sig_en)), sig_en, addr_ok); - - c->setPort(stringf("\\%sEN", pf), sig_en); + auto &port = mem.wr_ports[pi.mapped_port]; + SigSpec sig_data = port.data.extract(grid_d * bram.dbits, bram.dbits); + c->setPort(stringf("\\%sDATA", pf), sig_data); - if (pi.wrmode == 1 && enable_make_transp) - module->connect(mktr_wren[grid_a], sig_en); - } + if (pi.enable) + { + SigSpec sig_en; + int stride = bram.dbits / pi.enable; + for (int i = 0; i < pi.enable; i++) + sig_en.append(port.en[stride * i + grid_d * bram.dbits]); - SigSpec sig_addr = pi.sig_addr; - sig_addr.extend_u0(bram.abits); - c->setPort(stringf("\\%sADDR", pf), sig_addr); + if (!addr_ok.empty()) + sig_en = module->Mux(NEW_ID, SigSpec(0, GetSize(sig_en)), sig_en, addr_ok); - if (pi.wrmode == 1 && enable_make_transp && grid_a == 0) - module->connect(mktr_wraddr, sig_addr); + c->setPort(stringf("\\%sEN", pf), sig_en); - SigSpec sig_data = pi.sig_data; - sig_data.extend_u0((grid_d+1) * bram.dbits); - sig_data = sig_data.extract(grid_d * bram.dbits, bram.dbits); + if (enable_make_transp) + module->connect(mktr_wren[grid_a], sig_en); + } + else if (enable_make_transp) + module->connect(mktr_wren[grid_a], addr_ok); - if (pi.wrmode == 1) { - c->setPort(stringf("\\%sDATA", pf), sig_data); - if (enable_make_transp && grid_a == 0) + if (enable_make_transp && grid_a == 0) { + module->connect(mktr_wraddr, sig_addr); module->connect(mktr_wrdata, sig_data); + } } else { + if (pi.mapped_port == -1) + { + if (pi.enable) + c->setPort(stringf("\\%sEN", pf), State::S0); + continue; + } + auto &port = mem.rd_ports[pi.mapped_port]; + SigSpec sig_data = port.data.extract(grid_d * bram.dbits, bram.dbits); + SigSpec bram_dout = module->addWire(NEW_ID, bram.dbits); c->setPort(stringf("\\%sDATA", pf), bram_dout); if (pi.make_transp) { @@ -1010,22 +1003,21 @@ grow_read_ports:; } } - for (int i = bram.dbits-1; i >= 0; i--) - if (sig_data[i].wire == nullptr) { - sig_data.remove(i); - bram_dout.remove(i); - } - SigSpec addr_ok_q = addr_ok; - if (pi.clocks && !addr_ok.empty()) { + if (port.clk_enable && !addr_ok.empty()) { addr_ok_q = module->addWire(NEW_ID); - if (!pi.sig_en.empty()) - addr_ok = module->Mux(NEW_ID, addr_ok_q, addr_ok, pi.sig_en); - module->addDff(NEW_ID, pi.sig_clock, addr_ok, addr_ok_q, pi.effective_clkpol); + module->addDffe(NEW_ID, port.clk, port.en, addr_ok, addr_ok_q, port.clk_polarity); } dout_cache[sig_data].first.append(addr_ok_q); dout_cache[sig_data].second.append(bram_dout); + + if (pi.enable) { + SigSpec sig_en = port.en; + if (!addr_ok.empty()) + sig_en = module->And(NEW_ID, sig_en, addr_ok); + c->setPort(stringf("\\%sEN", pf), sig_en); + } } } } @@ -1070,20 +1062,6 @@ void handle_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals) log(" %s=%d", it.first.c_str(), it.second); log("\n"); - // This pass cannot deal with write port priority — we need to emulate it, - // if present. Since priority emulation will change the enable signals, - // which in turn may change enable grouping and mapping eligibility in - // pathological cases, we need to do this before checking mapping - // eligibility. This will create priority emulation logic for all - // memories in the design regardless of whether we end up mapping them - // or not, but since we never call Mem::emit(), the new priority masks - // and enables won't be commited to the design, and this logic will be - // unused (and removed by subsequent opt_clean) for unmapped memories. - - for (int i = 0; i < GetSize(mem.wr_ports); i++) - for (int j = 0; j < i; j++) - mem.emulate_priority(j, i); - pool> failed_brams; dict, tuple> best_rule_cache; -- 2.30.2