Added memory_bram "shuffle_enable" feature
authorClifford Wolf <clifford@clifford.at>
Sun, 4 Jan 2015 12:14:30 +0000 (13:14 +0100)
committerClifford Wolf <clifford@clifford.at>
Sun, 4 Jan 2015 12:14:30 +0000 (13:14 +0100)
passes/memory/memory_bram.cc
tests/bram/generate.py

index b63e5e7b9f4dff3b755cd7085893154871412058..9e61c7f3937ef828cf685fa1ab78634e71665eed 100644 (file)
@@ -64,6 +64,7 @@ struct rules_t
                IdString name;
                dict<string, int> min_limits, max_limits;
                bool or_next_if_better;
+               int shuffle_enable;
        };
 
        dict<IdString, bram_t> brams;
@@ -176,6 +177,7 @@ struct rules_t
                match_t data;
                data.name = RTLIL::escape_id(tokens[1]);
                data.or_next_if_better = false;
+               data.shuffle_enable = 0;
 
                while (next_line())
                {
@@ -194,6 +196,11 @@ struct rules_t
                                continue;
                        }
 
+                       if (GetSize(tokens) == 2 && tokens[0] == "shuffle_enable") {
+                               data.shuffle_enable = atoi(tokens[1].c_str());
+                               continue;
+                       }
+
                        if (GetSize(tokens) == 1 && tokens[0] == "or_next_if_better") {
                                data.or_next_if_better = true;
                                continue;
@@ -284,6 +291,98 @@ bool replace_cell(Cell *cell, const rules_t::bram_t &bram, const rules_t::match_
        SigSpec rd_data = cell->getPort("\\RD_DATA");
        SigSpec rd_addr = cell->getPort("\\RD_ADDR");
 
+       if (match.shuffle_enable)
+       {
+               int bucket_size = bram.dbits / match.shuffle_enable;
+               log("      Shuffle enable and data bit to accommodate enable buckets of size %d..\n", bucket_size);
+
+               // extract unshuffled data/enable bits
+
+               std::vector<SigSpec> old_wr_en;
+               std::vector<SigSpec> old_wr_data;
+               std::vector<SigSpec> old_rd_data;
+
+               for (int i = 0; i < wr_ports; i++) {
+                       old_wr_en.push_back(wr_en.extract(i*mem_width, mem_width));
+                       old_wr_data.push_back(wr_data.extract(i*mem_width, mem_width));
+               }
+
+               for (int i = 0; i < rd_ports; i++)
+                       old_rd_data.push_back(rd_data.extract(i*mem_width, mem_width));
+
+               // analyze enable structure
+
+               std::vector<SigSpec> en_order;
+               dict<SigSpec, vector<int>> bits_wr_en;
+
+               for (int i = 0; i < mem_width; i++) {
+                       SigSpec sig;
+                       for (int j = 0; j < wr_ports; j++)
+                               sig.append(old_wr_en[j][i]);
+                       if (bits_wr_en.count(sig) == 0)
+                               en_order.push_back(sig);
+                       bits_wr_en[sig].push_back(i);
+               }
+
+               // re-create memory ports
+
+               std::vector<SigSpec> new_wr_en(GetSize(old_wr_en));
+               std::vector<SigSpec> new_wr_data(GetSize(old_wr_data));
+               std::vector<SigSpec> new_rd_data(GetSize(old_rd_data));
+               std::vector<int> shuffle_map;
+
+               for (auto &it : en_order)
+               {
+                       auto &bits = bits_wr_en.at(it);
+                       int buckets = (GetSize(bits) + bucket_size - 1) / bucket_size;
+                       int fillbits = buckets*bucket_size - GetSize(bits);
+                       SigBit fillbit;
+
+                       for (int i = 0; i < GetSize(bits); i++) {
+                               for (int j = 0; j < wr_ports; j++) {
+                                       new_wr_en[j].append(old_wr_en[j][bits[i]]);
+                                       new_wr_data[j].append(old_wr_data[j][bits[i]]);
+                                       fillbit = old_wr_en[j][bits[i]];
+                               }
+                               for (int j = 0; j < rd_ports; j++)
+                                       new_rd_data[j].append(old_rd_data[j][bits[i]]);
+                               shuffle_map.push_back(bits[i]);
+                       }
+
+                       for (int i = 0; i < fillbits; i++) {
+                               for (int j = 0; j < wr_ports; j++) {
+                                       new_wr_en[j].append(fillbit);
+                                       new_wr_data[j].append(State::Sx);
+                               }
+                               for (int j = 0; j < rd_ports; j++)
+                                       new_rd_data[j].append(State::Sx);
+                               shuffle_map.push_back(-1);
+                       }
+               }
+
+               log("      Results of enable shuffling:");
+               for (int v : shuffle_map)
+                       log(" %d", v);
+               log("\n");
+
+               // update mem_*, wr_*, and rd_* variables
+
+               mem_width = GetSize(new_wr_en.front());
+               wr_en = SigSpec(0, wr_ports * mem_width);
+               wr_data = SigSpec(0, wr_ports * mem_width);
+               rd_data = SigSpec(0, rd_ports * mem_width);
+
+               for (int i = 0; i < wr_ports; i++) {
+                       wr_en.replace(i*mem_width, new_wr_en[i]);
+                       wr_data.replace(i*mem_width, new_wr_data[i]);
+               }
+
+               for (int i = 0; i < rd_ports; i++)
+                       rd_data.replace(i*mem_width, new_rd_data[i]);
+       }
+
+       // assign write ports
+
        for (int cell_port_i = 0, bram_port_i = 0; cell_port_i < wr_ports; cell_port_i++)
        {
                bool clken = wr_clken[cell_port_i] == State::S1;
@@ -334,6 +433,7 @@ bool replace_cell(Cell *cell, const rules_t::bram_t &bram, const rules_t::match_
                                        sig_en.append(last_en_bit);
                                }
                                if (last_en_bit != wr_en[i + cell_port_i*mem_width]) {
+                                       log_dump(last_en_bit, wr_en[i + cell_port_i*mem_width]);
                                        log("        Bram port %c%d has incompatible enable structure.\n", pi.group + 'A', pi.index + 1);
                                        goto skip_bram_wport;
                                }
@@ -362,6 +462,8 @@ bool replace_cell(Cell *cell, const rules_t::bram_t &bram, const rules_t::match_
        mapped_wr_port:;
        }
 
+       // houskeeping stuff for growing more read ports and restarting read port assignments
+
        int grow_read_ports_cursor = -1;
        bool try_growing_more_read_ports = false;
        auto backup_clock_domains = clock_domains;
@@ -401,6 +503,8 @@ grow_read_ports:;
        read_transp[0] = false;
        read_transp[1] = true;
 
+       // assign read ports
+
        for (int cell_port_i = 0; cell_port_i < rd_ports; cell_port_i++)
        {
                bool clken = rd_clken[cell_port_i] == State::S1;
@@ -479,12 +583,14 @@ grow_read_ports:;
        mapped_rd_port:;
        }
 
+       // update properties and re-check conditions
+
        if (mode <= 1)
        {
                match_properties["dups"] = dup_count;
                match_properties["waste"] = match_properties["dups"] * match_properties["bwaste"];
 
-               int cells = ((match_properties["dbits"] + bram.dbits - 1) / bram.dbits) * ((match_properties["words"] + (1 << bram.abits) - 1) / (1 << bram.abits));
+               int cells = ((mem_width + bram.dbits - 1) / bram.dbits) * ((mem_size + (1 << bram.abits) - 1) / (1 << bram.abits));
                match_properties["efficiency"] = (100 * match_properties["bits"]) / (dup_count * cells * bram.dbits * (1 << bram.abits));
 
                log("      Updated properties: dups=%d waste=%d efficiency=%d\n",
@@ -515,6 +621,8 @@ grow_read_ports:;
                        return true;
        }
 
+       // actually replace that memory cell
+
        dict<SigSpec, pair<SigSpec, SigSpec>> dout_cache;
 
        for (int grid_d = 0; grid_d*bram.dbits < mem_width; grid_d++)
@@ -825,6 +933,10 @@ struct MemoryBramPass : public Pass {
                log("has a higher efficiency than the next match (and the one after that if\n");
                log("the next also has 'or_next_if_better' set, and so forth).\n");
                log("\n");
+               log("A match containing the command 'shuffle_enable <N>' will re-organize\n");
+               log("the data bits to accommodate bram ports with <N> enable bits before\n");
+               log("mapping to the bram.\n");
+               log("\n");
        }
        virtual void execute(vector<string> args, Design *design)
        {
index 03f2555dc4427432cfba2715ce5e19b3abb29203..4bf3a9541bd0b107971de682fa23ed36e8255e7e 100644 (file)
@@ -68,6 +68,10 @@ def create_bram(dsc_f, sim_f, ref_f, tb_f, k1, k2, or_next):
     print("  clkpol %s" % " ".join(["%d" % i for i in clkpol]), file=dsc_f)
     print("endbram", file=dsc_f)
     print("match bram_%02d_%02d" % (k1, k2), file=dsc_f)
+    if random.randrange(2):
+        non_zero_enables = [i for i in enable if i]
+        if len(non_zero_enables):
+            print("  shuffle_enable %d" % random.choice(non_zero_enables), file=dsc_f)
     if or_next:
         print("  or_next_if_better", file=dsc_f)
     print("endmatch", file=dsc_f)