int group, index, dupidx;
int wrmode, enable, transp, clocks, clkpol;
- SigBit sig_clock;
- SigSpec sig_addr, sig_data, sig_en;
- bool effective_clkpol;
bool make_transp;
bool make_outreg;
int mapped_port;
pi.mapped_port = -1;
pi.make_transp = false;
pi.make_outreg = false;
- pi.effective_clkpol = false;
portinfos.push_back(pi);
}
return portinfos;
}
};
-bool replace_memory(Mem &orig_mem, const rules_t &rules, FfInitVals *initvals, const rules_t::bram_t &bram, const rules_t::match_t &match, dict<string, int> &match_properties, int mode)
+bool replace_memory(Mem &mem, const rules_t &rules, FfInitVals *initvals, const rules_t::bram_t &bram, const rules_t::match_t &match, dict<string, int> &match_properties, int mode)
{
- // We will modify ports — make a copy of the structure.
- Mem mem(orig_mem);
Module *module = mem.module;
auto portinfos = bram.make_portinfos();
log(" Mapping to bram type %s (variant %d):\n", log_id(bram.name), bram.variant);
// bram.dump_config();
- bool cell_init = !mem.inits.empty();
- vector<Const> initdata;
-
- if (cell_init) {
- Const initparam = mem.get_init_data();
- initdata.reserve(mem.size);
- for (int i=0; i < mem.size; i++)
- initdata.push_back(initparam.extract(mem.width*i, mem.width, State::Sx));
- }
-
+ std::vector<int> shuffle_map;
if (match.shuffle_enable && bram.dbits >= portinfos.at(match.shuffle_enable - 'A').enable*2 && portinfos.at(match.shuffle_enable - 'A').enable > 0 && !mem.wr_ports.empty())
{
int bucket_size = bram.dbits / portinfos.at(match.shuffle_enable - 'A').enable;
log(" Shuffle bit order to accommodate enable buckets of size %d..\n", bucket_size);
- // extract unshuffled data/enable bits
-
- std::vector<SigSpec> old_wr_en;
- std::vector<SigSpec> old_wr_data;
- std::vector<SigSpec> old_rd_data;
-
- for (auto &port : mem.wr_ports) {
- old_wr_en.push_back(port.en);
- old_wr_data.push_back(port.data);
- }
-
- for (auto &port : mem.rd_ports)
- old_rd_data.push_back(port.data);
-
// analyze enable structure
std::vector<SigSpec> en_order;
bits_wr_en[sig].push_back(i);
}
- // re-create memory ports
-
- std::vector<SigSpec> new_wr_en(GetSize(old_wr_en));
- std::vector<SigSpec> new_wr_data(GetSize(old_wr_data));
- std::vector<SigSpec> new_rd_data(GetSize(old_rd_data));
- std::vector<std::vector<State>> new_initdata;
- std::vector<int> shuffle_map;
-
- if (cell_init)
- new_initdata.resize(mem.size);
-
for (auto &it : en_order)
{
- auto &bits = bits_wr_en.at(it);
- int buckets = (GetSize(bits) + bucket_size - 1) / bucket_size;
- int fillbits = buckets*bucket_size - GetSize(bits);
- SigBit fillbit;
-
- for (int i = 0; i < GetSize(bits); i++) {
- for (int j = 0; j < GetSize(mem.wr_ports); j++) {
- new_wr_en[j].append(old_wr_en[j][bits[i]]);
- new_wr_data[j].append(old_wr_data[j][bits[i]]);
- fillbit = old_wr_en[j][bits[i]];
- }
- for (int j = 0; j < GetSize(mem.rd_ports); j++)
- new_rd_data[j].append(old_rd_data[j][bits[i]]);
- if (cell_init) {
- for (int j = 0; j < mem.size; j++)
- new_initdata[j].push_back(initdata[j][bits[i]]);
- }
- shuffle_map.push_back(bits[i]);
- }
+ for (auto bit : bits_wr_en.at(it))
+ shuffle_map.push_back(bit);
- for (int i = 0; i < fillbits; i++) {
- for (int j = 0; j < GetSize(mem.wr_ports); j++) {
- new_wr_en[j].append(fillbit);
- new_wr_data[j].append(State::S0);
- }
- for (int j = 0; j < GetSize(mem.rd_ports); j++)
- new_rd_data[j].append(State::Sx);
- if (cell_init) {
- for (int j = 0; j < mem.size; j++)
- new_initdata[j].push_back(State::Sx);
- }
+ while (GetSize(shuffle_map) % bucket_size)
shuffle_map.push_back(-1);
- }
}
log(" Results of bit order shuffling:");
log("\n");
// update mem_*, wr_*, and rd_* variables
-
- mem.width = GetSize(new_wr_en.front());
-
- for (int i = 0; i < GetSize(mem.wr_ports); i++) {
- auto &port = mem.wr_ports[i];
- port.en = new_wr_en[i];
- port.data = new_wr_data[i];
- }
-
- for (int i = 0; i < GetSize(mem.rd_ports); i++) {
- auto &port = mem.rd_ports[i];
- port.data = new_rd_data[i];
- }
-
- if (cell_init) {
- for (int i = 0; i < mem.size; i++)
- initdata[i] = Const(new_initdata[i]);
- }
+ } else {
+ for (int i = 0; i < mem.width; i++)
+ shuffle_map.push_back(i);
}
+ // Align width up to dbits.
+ while (GetSize(shuffle_map) % bram.dbits)
+ shuffle_map.push_back(-1);
+
// assign write ports
pair<SigBit, bool> wr_clkdom;
for (int cell_port_i = 0, bram_port_i = 0; cell_port_i < GetSize(mem.wr_ports); cell_port_i++)
for (; bram_port_i < GetSize(portinfos); bram_port_i++)
{
auto &pi = portinfos[bram_port_i];
- make_transp_enbits = pi.enable;
+ make_transp_enbits = pi.enable ? pi.enable : 1;
make_transp_clk = clkdom;
if (pi.wrmode != 1)
}
}
- SigSpec sig_en;
- SigBit last_en_bit = State::S1;
- for (int i = 0; i < mem.width; i++) {
- if (pi.enable && i % (bram.dbits / pi.enable) == 0) {
- last_en_bit = port.en[i];
- sig_en.append(last_en_bit);
- }
- if (last_en_bit != port.en[i]) {
- log(" Bram port %c%d has incompatible enable structure.\n", pi.group + 'A', pi.index + 1);
- goto skip_bram_wport;
+ // We need to check enable compatibility of this port, as well as all
+ // ports that have priority over this one (because they will be involved
+ // in emulate_priority logic).
+
+ for (int i = 0; i < GetSize(mem.wr_ports); i++) {
+ auto &oport = mem.wr_ports[i];
+ if (i != cell_port_i && !oport.priority_mask[cell_port_i])
+ continue;
+ SigBit last_en_bit = State::S1;
+ for (int j = 0; j < GetSize(shuffle_map); j++) {
+ if (shuffle_map[j] == -1)
+ continue;
+ SigBit en_bit = oport.en[shuffle_map[j]];
+ if (pi.enable && j % (bram.dbits / pi.enable) == 0)
+ last_en_bit = en_bit;
+ if (last_en_bit != en_bit) {
+ log(" Bram port %c%d has incompatible enable structure.\n", pi.group + 'A', pi.index + 1);
+ goto skip_bram_wport;
+ }
}
}
if (port.clk_enable) {
clock_domains[pi.clocks] = clkdom;
clock_polarities[pi.clkpol] = clkdom.second;
- pi.sig_clock = clkdom.first;
- pi.effective_clkpol = clkdom.second;
}
- pi.sig_en = sig_en;
- pi.sig_addr = port.addr;
- pi.sig_data = port.data;
-
bram_port_i++;
goto mapped_wr_port;
}
for (auto &pi : portinfos) {
if (pi.wrmode == 0) {
pi.mapped_port = -1;
- pi.sig_clock = SigBit();
- pi.sig_addr = SigSpec();
- pi.sig_data = SigSpec();
- pi.sig_en = SigSpec();
pi.make_outreg = false;
pi.make_transp = false;
}
log(" Mapped to bram port %c%d.%d.\n", pi.group + 'A', pi.index + 1, pi.dupidx + 1);
pi.mapped_port = cell_port_i;
- if (port.clk_enable && !pi.make_outreg) {
+ if (pi.clocks) {
clock_domains[pi.clocks] = clkdom;
clock_polarities[pi.clkpol] = clkdom.second;
if (!pi.make_transp)
read_transp[pi.transp] = transp;
- pi.sig_clock = clkdom.first;
- pi.sig_en = port.en;
- pi.effective_clkpol = clkdom.second;
}
- pi.sig_addr = port.addr;
- pi.sig_data = port.data;
-
if (grow_read_ports_cursor < cell_port_i) {
grow_read_ports_cursor = cell_port_i;
try_growing_more_read_ports = true;
// update properties and re-check conditions
+ int dcells = GetSize(shuffle_map) / bram.dbits;
+ int acells = (mem.size + (1 << bram.abits) - 1) / (1 << bram.abits);
if (mode <= 1)
{
match_properties["dups"] = dup_count;
match_properties["waste"] = match_properties["dups"] * match_properties["bwaste"];
- int cells = ((mem.width + bram.dbits - 1) / bram.dbits) * ((mem.size + (1 << bram.abits) - 1) / (1 << bram.abits));
+ int cells = dcells * acells;
match_properties["efficiency"] = (100 * match_properties["bits"]) / (dup_count * cells * bram.dbits * (1 << bram.abits));
- match_properties["dcells"] = ((mem.width + bram.dbits - 1) / bram.dbits);
- match_properties["acells"] = ((mem.size + (1 << bram.abits) - 1) / (1 << bram.abits));
- match_properties["cells"] = match_properties["dcells"] * match_properties["acells"] * match_properties["dups"];
+ match_properties["dcells"] = dcells;
+ match_properties["acells"] = acells;
+ match_properties["cells"] = cells * dup_count;
log(" Updated properties: dups=%d waste=%d efficiency=%d\n",
match_properties["dups"], match_properties["waste"], match_properties["efficiency"]);
return true;
}
+ // At this point we are commited to replacing the RAM, and can mutate mem.
+
+ // We don't really support priorities, emulate them.
+ for (int i = 0; i < GetSize(mem.wr_ports); i++)
+ for (int j = 0; j < i; j++)
+ mem.emulate_priority(j, i);
+
+ // Now the big swizzle.
+ mem.width = GetSize(shuffle_map);
+
+ // Swizzle write ports.
+ for (auto &port : mem.wr_ports) {
+ SigSpec new_en, new_data;
+ SigBit en_bit = State::S1;
+ for (auto idx : shuffle_map) {
+ if (idx == -1) {
+ new_data.append(State::Sx);
+ } else {
+ new_data.append(port.data[idx]);
+ en_bit = port.en[idx];
+ }
+ new_en.append(en_bit);
+ }
+ port.en = new_en;
+ port.data = new_data;
+ }
+
+ // Swizzle read ports.
+ for (auto &port : mem.rd_ports) {
+ SigSpec new_data = module->addWire(NEW_ID, mem.width);
+ Const new_init_value = Const(State::Sx, mem.width);
+ Const new_arst_value = Const(State::Sx, mem.width);
+ Const new_srst_value = Const(State::Sx, mem.width);
+ for (int i = 0; i < mem.width; i++)
+ if (shuffle_map[i] != -1) {
+ module->connect(port.data[shuffle_map[i]], new_data[i]);
+ new_init_value[i] = port.init_value[shuffle_map[i]];
+ new_arst_value[i] = port.arst_value[shuffle_map[i]];
+ new_srst_value[i] = port.srst_value[shuffle_map[i]];
+ }
+ port.data = new_data;
+ port.init_value = new_init_value;
+ port.arst_value = new_arst_value;
+ port.srst_value = new_srst_value;
+ }
+
+ // Swizzle the init data.
+ bool cell_init = !mem.inits.empty();
+ vector<Const> initdata;
+ if (cell_init) {
+ Const initparam = mem.get_init_data();
+ initdata.reserve(mem.size);
+ for (int i = 0; i < mem.size; i++) {
+ std::vector<State> val;
+ for (auto idx : shuffle_map) {
+ if (idx == -1)
+ val.push_back(State::Sx);
+ else
+ val.push_back(initparam[mem.width * i + idx]);
+ }
+ initdata.push_back(Const(val));
+ }
+ }
+
// prepare variant parameters
dict<IdString, Const> variant_params;
bram.find_variant_params(variant_params, other_bram);
// Apply make_outreg where necessary.
- for (auto &pi : portinfos) {
- if (pi.make_outreg) {
+ for (auto &pi : portinfos)
+ if (pi.make_outreg)
mem.extract_rdff(pi.mapped_port, initvals);
- auto &port = mem.rd_ports[pi.mapped_port];
- pi.sig_addr = port.addr;
- pi.sig_data = port.data;
- }
- }
// actually replace that memory cell
dict<SigSpec, pair<SigSpec, SigSpec>> dout_cache;
- for (int grid_d = 0; grid_d*bram.dbits < mem.width; grid_d++)
+ for (int grid_d = 0; grid_d < dcells; grid_d++)
{
SigSpec mktr_wraddr, mktr_wrdata, mktr_wrdata_q;
vector<SigSpec> mktr_wren;
mktr_wrdata = module->addWire(NEW_ID, bram.dbits);
mktr_wrdata_q = module->addWire(NEW_ID, bram.dbits);
module->addDff(NEW_ID, make_transp_clk.first, mktr_wrdata, mktr_wrdata_q, make_transp_clk.second);
- for (int grid_a = 0; grid_a*(1 << bram.abits) < mem.size; grid_a++)
+ for (int grid_a = 0; grid_a < acells; grid_a++)
mktr_wren.push_back(module->addWire(NEW_ID, make_transp_enbits));
}
- for (int grid_a = 0; grid_a*(1 << bram.abits) < mem.size; grid_a++)
+ for (int grid_a = 0; grid_a < acells; grid_a++)
for (int dupidx = 0; dupidx < dup_count; dupidx++)
{
Cell *c = module->addCell(module->uniquify(stringf("%s.%d.%d.%d", mem.memid.c_str(), grid_d, grid_a, dupidx)), bram.name);
c->setParam(vp.first, vp.second);
if (cell_init) {
- int init_offset = grid_a*(1 << bram.abits);
+ int init_offset = grid_a*(1 << bram.abits) - mem.start_offset;
int init_shift = grid_d*bram.dbits;
int init_size = (1 << bram.abits);
Const initparam(State::Sx, init_size*bram.dbits);
- for (int i = 0; i < init_size; i++) {
- State padding = State::Sx;
+ for (int i = 0; i < init_size; i++)
for (int j = 0; j < bram.dbits; j++)
- if (init_offset+i < GetSize(initdata) && init_shift+j < GetSize(initdata[init_offset+i]))
+ if (init_offset+i < GetSize(initdata) && init_offset+i >= 0)
initparam[i*bram.dbits+j] = initdata[init_offset+i][init_shift+j];
else
- initparam[i*bram.dbits+j] = padding;
- }
+ initparam[i*bram.dbits+j] = State::Sx;
c->setParam(ID::INIT, initparam);
}
string prefix = stringf("%c%d", pi.group + 'A', pi.index + 1);
const char *pf = prefix.c_str();
- if (pi.clocks && (!c->hasPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1)) || pi.sig_clock.wire)) {
- c->setPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1), pi.sig_clock);
- if (pi.clkpol > 1 && pi.sig_clock.wire)
- c->setParam(stringf("\\CLKPOL%d", (pi.clkpol-1) % clkpol_max + 1), clock_polarities.at(pi.clkpol));
- if (pi.transp > 1 && pi.sig_clock.wire)
- c->setParam(stringf("\\TRANSP%d", (pi.transp-1) % transp_max + 1), read_transp.at(pi.transp));
- }
+ if (pi.clocks && clock_domains.count(pi.clocks))
+ c->setPort(stringf("\\CLK%d", (pi.clocks-1) % clocks_max + 1), clock_domains.at(pi.clocks).first);
+ if (pi.clkpol > 1 && clock_polarities.count(pi.clkpol))
+ c->setParam(stringf("\\CLKPOL%d", (pi.clkpol-1) % clkpol_max + 1), clock_polarities.at(pi.clkpol));
+ if (pi.transp > 1 && read_transp.count(pi.transp))
+ c->setParam(stringf("\\TRANSP%d", (pi.transp-1) % transp_max + 1), read_transp.at(pi.transp));
SigSpec addr_ok;
- if (GetSize(pi.sig_addr) > bram.abits) {
- SigSpec extra_addr = pi.sig_addr.extract(bram.abits, GetSize(pi.sig_addr) - bram.abits);
+ SigSpec sig_addr;
+ if (pi.mapped_port >= 0) {
+ if (pi.wrmode == 1)
+ sig_addr = mem.wr_ports[pi.mapped_port].addr;
+ else
+ sig_addr = mem.rd_ports[pi.mapped_port].addr;
+ }
+
+ if (GetSize(sig_addr) > bram.abits) {
+ SigSpec extra_addr = sig_addr.extract(bram.abits, GetSize(sig_addr) - bram.abits);
SigSpec extra_addr_sel = SigSpec(grid_a, GetSize(extra_addr));
addr_ok = module->Eq(NEW_ID, extra_addr, extra_addr_sel);
}
- if (pi.enable)
- {
- SigSpec sig_en = pi.sig_en;
+ sig_addr.extend_u0(bram.abits);
+ c->setPort(stringf("\\%sADDR", pf), sig_addr);
- if (pi.wrmode == 1) {
- sig_en.extend_u0((grid_d+1) * pi.enable);
- sig_en = sig_en.extract(grid_d * pi.enable, pi.enable);
+ if (pi.wrmode == 1) {
+ if (pi.mapped_port == -1)
+ {
+ if (pi.enable)
+ c->setPort(stringf("\\%sEN", pf), Const(State::S0, pi.enable));
+ continue;
}
- if (!addr_ok.empty())
- sig_en = module->Mux(NEW_ID, SigSpec(0, GetSize(sig_en)), sig_en, addr_ok);
-
- c->setPort(stringf("\\%sEN", pf), sig_en);
+ auto &port = mem.wr_ports[pi.mapped_port];
+ SigSpec sig_data = port.data.extract(grid_d * bram.dbits, bram.dbits);
+ c->setPort(stringf("\\%sDATA", pf), sig_data);
- if (pi.wrmode == 1 && enable_make_transp)
- module->connect(mktr_wren[grid_a], sig_en);
- }
+ if (pi.enable)
+ {
+ SigSpec sig_en;
+ int stride = bram.dbits / pi.enable;
+ for (int i = 0; i < pi.enable; i++)
+ sig_en.append(port.en[stride * i + grid_d * bram.dbits]);
- SigSpec sig_addr = pi.sig_addr;
- sig_addr.extend_u0(bram.abits);
- c->setPort(stringf("\\%sADDR", pf), sig_addr);
+ if (!addr_ok.empty())
+ sig_en = module->Mux(NEW_ID, SigSpec(0, GetSize(sig_en)), sig_en, addr_ok);
- if (pi.wrmode == 1 && enable_make_transp && grid_a == 0)
- module->connect(mktr_wraddr, sig_addr);
+ c->setPort(stringf("\\%sEN", pf), sig_en);
- SigSpec sig_data = pi.sig_data;
- sig_data.extend_u0((grid_d+1) * bram.dbits);
- sig_data = sig_data.extract(grid_d * bram.dbits, bram.dbits);
+ if (enable_make_transp)
+ module->connect(mktr_wren[grid_a], sig_en);
+ }
+ else if (enable_make_transp)
+ module->connect(mktr_wren[grid_a], addr_ok);
- if (pi.wrmode == 1) {
- c->setPort(stringf("\\%sDATA", pf), sig_data);
- if (enable_make_transp && grid_a == 0)
+ if (enable_make_transp && grid_a == 0) {
+ module->connect(mktr_wraddr, sig_addr);
module->connect(mktr_wrdata, sig_data);
+ }
} else {
+ if (pi.mapped_port == -1)
+ {
+ if (pi.enable)
+ c->setPort(stringf("\\%sEN", pf), State::S0);
+ continue;
+ }
+ auto &port = mem.rd_ports[pi.mapped_port];
+ SigSpec sig_data = port.data.extract(grid_d * bram.dbits, bram.dbits);
+
SigSpec bram_dout = module->addWire(NEW_ID, bram.dbits);
c->setPort(stringf("\\%sDATA", pf), bram_dout);
if (pi.make_transp) {
}
}
- for (int i = bram.dbits-1; i >= 0; i--)
- if (sig_data[i].wire == nullptr) {
- sig_data.remove(i);
- bram_dout.remove(i);
- }
-
SigSpec addr_ok_q = addr_ok;
- if (pi.clocks && !addr_ok.empty()) {
+ if (port.clk_enable && !addr_ok.empty()) {
addr_ok_q = module->addWire(NEW_ID);
- if (!pi.sig_en.empty())
- addr_ok = module->Mux(NEW_ID, addr_ok_q, addr_ok, pi.sig_en);
- module->addDff(NEW_ID, pi.sig_clock, addr_ok, addr_ok_q, pi.effective_clkpol);
+ module->addDffe(NEW_ID, port.clk, port.en, addr_ok, addr_ok_q, port.clk_polarity);
}
dout_cache[sig_data].first.append(addr_ok_q);
dout_cache[sig_data].second.append(bram_dout);
+
+ if (pi.enable) {
+ SigSpec sig_en = port.en;
+ if (!addr_ok.empty())
+ sig_en = module->And(NEW_ID, sig_en, addr_ok);
+ c->setPort(stringf("\\%sEN", pf), sig_en);
+ }
}
}
}
log(" %s=%d", it.first.c_str(), it.second);
log("\n");
- // This pass cannot deal with write port priority — we need to emulate it,
- // if present. Since priority emulation will change the enable signals,
- // which in turn may change enable grouping and mapping eligibility in
- // pathological cases, we need to do this before checking mapping
- // eligibility. This will create priority emulation logic for all
- // memories in the design regardless of whether we end up mapping them
- // or not, but since we never call Mem::emit(), the new priority masks
- // and enables won't be commited to the design, and this logic will be
- // unused (and removed by subsequent opt_clean) for unmapped memories.
-
- for (int i = 0; i < GetSize(mem.wr_ports); i++)
- for (int j = 0; j < i; j++)
- mem.emulate_priority(j, i);
-
pool<pair<IdString, int>> failed_brams;
dict<pair<int, int>, tuple<int, int, int>> best_rule_cache;