TimingInfo: index by (port_name,offset)
[yosys.git] / passes / techmap / abc9_ops.cc
index 816c0276a4d19b6196a9d8783672f2ba5ab818a6..e5de2bcc4cf21cb185d5e90e1348d70cc16d8c76 100644 (file)
@@ -22,8 +22,7 @@
 #include "kernel/sigtools.h"
 #include "kernel/utils.h"
 #include "kernel/celltypes.h"
-
-#define ABC9_DELAY_BASE_ID 9000
+#include "kernel/timinginfo.h"
 
 USING_YOSYS_NAMESPACE
 PRIVATE_NAMESPACE_BEGIN
@@ -39,20 +38,20 @@ void check(RTLIL::Design *design)
 {
        dict<IdString,IdString> box_lookup;
        for (auto m : design->modules()) {
+               if (m->name.begins_with("$paramod"))
+                       continue;
+
                auto flop = m->get_bool_attribute(ID(abc9_flop));
                auto it = m->attributes.find(ID(abc9_box_id));
-               if (it == m->attributes.end()) {
-                       if (flop)
-                               log_error("Module '%s' contains (* abc9_flop *) but not (* abc9_box_id=<int> *).\n", log_id(m));
-                       continue;
+               if (!flop) {
+                       if (it == m->attributes.end())
+                               continue;
+                       auto id = it->second.as_int();
+                       auto r = box_lookup.insert(std::make_pair(stringf("$__boxid%d", id), m->name));
+                       if (!r.second)
+                               log_error("Module '%s' has the same abc9_box_id = %d value as '%s'.\n",
+                                               log_id(m), id, log_id(r.first->second));
                }
-               if (m->name.begins_with("$paramod"))
-                       continue;
-               auto id = it->second.as_int();
-               auto r = box_lookup.insert(std::make_pair(stringf("$__boxid%d", id), m->name));
-               if (!r.second)
-                       log_error("Module '%s' has the same abc9_box_id = %d value as '%s'.\n",
-                                       log_id(m), id, log_id(r.first->second));
 
                // Make carry in the last PI, and carry out the last PO
                //   since ABC requires it this way
@@ -86,7 +85,7 @@ void check(RTLIL::Design *design)
                                if (wire->port_output) num_outputs++;
                        }
                        if (num_outputs != 1)
-                               log_error("Module '%s' with (* abc_flop *) has %d outputs (expect 1).\n", log_id(m), num_outputs);
+                               log_error("Module '%s' with (* abc9_flop *) has %d outputs (expect 1).\n", log_id(m), num_outputs);
                }
        }
 }
@@ -143,18 +142,9 @@ void prep_dff(RTLIL::Module *module)
                clkdomain_t key(abc9_clock);
 
                auto r = clk_to_mergeability.insert(std::make_pair(abc9_clock, clk_to_mergeability.size() + 1));
-               auto r2 YS_ATTRIBUTE(unused) = cell->attributes.insert(std::make_pair(ID(abc9_mergeability), r.first->second));
-               log_assert(r2.second);
-
-               Wire *abc9_init_wire = module->wire(stringf("%s.init", cell->name.c_str()));
-               if (abc9_init_wire == NULL)
-                       log_error("'%s.init' is not a wire present in module '%s'.\n", cell->name.c_str(), log_id(module));
-               log_assert(GetSize(abc9_init_wire) == 1);
-               SigSpec abc9_init = assign_map(abc9_init_wire);
-               if (!abc9_init.is_fully_const())
-                       log_error("'%s.init' is not a constant wire present in module '%s'.\n", cell->name.c_str(), log_id(module));
-               r2 = cell->attributes.insert(std::make_pair(ID(abc9_init), abc9_init.as_const()));
+               auto r2  = cell->attributes.insert(ID(abc9_mergeability));;
                log_assert(r2.second);
+               r2.first->second = r.first->second;
        }
 
        RTLIL::Module *holes_module = design->module(stringf("%s$holes", module->name.c_str()));
@@ -162,38 +152,44 @@ void prep_dff(RTLIL::Module *module)
                SigMap sigmap(holes_module);
 
                dict<SigSpec, SigSpec> replace;
-               for (auto it = holes_module->cells_.begin(); it != holes_module->cells_.end(); ) {
-                       auto cell = it->second;
-                       if (cell->type.in("$_DFF_N_", "$_DFF_NN0_", "$_DFF_NN1_", "$_DFF_NP0_", "$_DFF_NP1_",
-                                               "$_DFF_P_", "$_DFF_PN0_", "$_DFF_PN1", "$_DFF_PP0_", "$_DFF_PP1_")) {
-                               SigBit D = cell->getPort("\\D");
-                               SigBit Q = cell->getPort("\\Q");
-                               // Remove the $_DFF_* cell from what needs to be a combinatorial box
-                               it = holes_module->cells_.erase(it);
-                               Wire *port;
-                               if (GetSize(Q.wire) == 1)
-                                       port = holes_module->wire(stringf("$abc%s", Q.wire->name.c_str()));
-                               else
-                                       port = holes_module->wire(stringf("$abc%s[%d]", Q.wire->name.c_str(), Q.offset));
-                               log_assert(port);
-                               // Prepare to replace "assign <port> = $_DFF_*.Q;" with "assign <port> = $_DFF_*.D;"
-                               //   in order to extract just the combinatorial control logic that feeds the box
-                               //   (i.e. clock enable, synchronous reset, etc.)
-                               replace.insert(std::make_pair(Q,D));
-                               // Since `flatten` above would have created wires named "<cell>.Q",
-                               //   extract the pre-techmap cell name
-                               auto pos = Q.wire->name.str().rfind(".");
-                               log_assert(pos != std::string::npos);
-                               IdString driver = Q.wire->name.substr(0, pos);
-                               // And drive the signal that was previously driven by "DFF.Q" (typically
-                               //   used to implement clock-enable functionality) with the "<cell>.$abc9_currQ"
-                               //   wire (which itself is driven an by input port) we inserted above
-                               Wire *currQ = holes_module->wire(stringf("%s.abc9_ff.Q", driver.c_str()));
-                               log_assert(currQ);
-                               holes_module->connect(Q, currQ);
-                       }
+               for (auto cell : holes_module->cells().to_vector()) {
+                       if (!cell->type.in("$_DFF_N_", "$_DFF_NN0_", "$_DFF_NN1_", "$_DFF_NP0_", "$_DFF_NP1_",
+                                               "$_DFF_P_", "$_DFF_PN0_", "$_DFF_PN1", "$_DFF_PP0_", "$_DFF_PP1_"))
+                               continue;
+                       SigBit D = cell->getPort("\\D");
+                       SigBit Q = cell->getPort("\\Q");
+                       // Emulate async control embedded inside $_DFF_* cell with mux in front of D
+                       if (cell->type.in("$_DFF_NN0_", "$_DFF_PN0_"))
+                               D = holes_module->MuxGate(NEW_ID, State::S0, D, cell->getPort("\\R"));
+                       else if (cell->type.in("$_DFF_NN1_", "$_DFF_PN1_"))
+                               D = holes_module->MuxGate(NEW_ID, State::S1, D, cell->getPort("\\R"));
+                       else if (cell->type.in("$_DFF_NP0_", "$_DFF_PP0_"))
+                               D = holes_module->MuxGate(NEW_ID, D, State::S0, cell->getPort("\\R"));
+                       else if (cell->type.in("$_DFF_NP1_", "$_DFF_PP1_"))
+                               D = holes_module->MuxGate(NEW_ID, D, State::S1, cell->getPort("\\R"));
+                       // Remove the $_DFF_* cell from what needs to be a combinatorial box
+                       holes_module->remove(cell);
+                       Wire *port;
+                       if (GetSize(Q.wire) == 1)
+                               port = holes_module->wire(stringf("$abc%s", Q.wire->name.c_str()));
                        else
-                               ++it;
+                               port = holes_module->wire(stringf("$abc%s[%d]", Q.wire->name.c_str(), Q.offset));
+                       log_assert(port);
+                       // Prepare to replace "assign <port> = $_DFF_*.Q;" with "assign <port> = $_DFF_*.D;"
+                       //   in order to extract just the combinatorial control logic that feeds the box
+                       //   (i.e. clock enable, synchronous reset, etc.)
+                       replace.insert(std::make_pair(Q,D));
+                       // Since `flatten` above would have created wires named "<cell>.Q",
+                       //   extract the pre-techmap cell name
+                       auto pos = Q.wire->name.str().rfind(".");
+                       log_assert(pos != std::string::npos);
+                       IdString driver = Q.wire->name.substr(0, pos);
+                       // And drive the signal that was previously driven by "DFF.Q" (typically
+                       //   used to implement clock-enable functionality) with the "<cell>.$abc9_currQ"
+                       //   wire (which itself is driven an by input port) we inserted above
+                       Wire *currQ = holes_module->wire(stringf("%s.abc9_ff.Q", driver.c_str()));
+                       log_assert(currQ);
+                       holes_module->connect(Q, currQ);
                }
 
                for (auto &conn : holes_module->connections_)
@@ -215,15 +211,15 @@ void prep_xaiger(RTLIL::Module *module, bool dff)
        for (auto cell : module->cells()) {
                if (cell->type == "$__ABC9_FF_")
                        continue;
+               if (cell->has_keep_attr())
+                       continue;
 
                auto inst_module = module->design->module(cell->type);
-               bool abc9_box = inst_module && inst_module->attributes.count("\\abc9_box_id");
-               bool abc9_flop = false;
-               if (abc9_box) {
-                       abc9_flop = inst_module->get_bool_attribute("\\abc9_flop");
-                       if (abc9_flop && !dff)
-                               continue;
+               bool abc9_flop = inst_module && inst_module->get_bool_attribute("\\abc9_flop");
+               if (abc9_flop && !dff)
+                       continue;
 
+               if ((inst_module && inst_module->get_bool_attribute("\\abc9_box")) || abc9_flop) {
                        auto r = box_ports.insert(cell->type);
                        if (r.second) {
                                // Make carry in the last PI, and carry out the last PO
@@ -251,6 +247,7 @@ void prep_xaiger(RTLIL::Module *module, bool dff)
                else if (!yosys_celltypes.cell_known(cell->type))
                        continue;
 
+               // TODO: Speed up toposort -- we care about box ordering only
                for (auto conn : cell->connections()) {
                        if (cell->input(conn.first))
                                for (auto bit : sigmap(conn.second))
@@ -260,7 +257,6 @@ void prep_xaiger(RTLIL::Module *module, bool dff)
                                for (auto bit : sigmap(conn.second))
                                        bit_drivers[bit].insert(cell->name);
                }
-
                toposort.node(cell->name);
        }
 
@@ -270,8 +266,8 @@ void prep_xaiger(RTLIL::Module *module, bool dff)
        for (auto &it : bit_users)
                if (bit_drivers.count(it.first))
                        for (auto driver_cell : bit_drivers.at(it.first))
-                       for (auto user_cell : it.second)
-                               toposort.edge(driver_cell, user_cell);
+                               for (auto user_cell : it.second)
+                                       toposort.edge(driver_cell, user_cell);
 
        if (ys_debug(1))
                toposort.analyze_loops = true;
@@ -297,6 +293,7 @@ void prep_xaiger(RTLIL::Module *module, bool dff)
        holes_module->set_bool_attribute("\\abc9_holes");
 
        dict<IdString, Cell*> cell_cache;
+       TimingInfo timing;
 
        int port_id = 1, box_count = 0;
        for (auto cell_name : toposort.sorted) {
@@ -304,22 +301,22 @@ void prep_xaiger(RTLIL::Module *module, bool dff)
                log_assert(cell);
 
                RTLIL::Module* box_module = design->module(cell->type);
-               if (!box_module || !box_module->attributes.count("\\abc9_box_id"))
+               if (!box_module || (!box_module->get_bool_attribute("\\abc9_box") && !box_module->get_bool_attribute("\\abc9_flop")))
                        continue;
 
                cell->attributes["\\abc9_box_seq"] = box_count++;
 
-               IdString derived_name = box_module->derive(design, cell->parameters);
-               box_module = design->module(derived_name);
+               IdString derived_type = box_module->derive(design, cell->parameters);
+               box_module = design->module(derived_type);
 
-               auto r = cell_cache.insert(derived_name);
+               auto r = cell_cache.insert(derived_type);
                auto &holes_cell = r.first->second;
                if (r.second) {
                        if (box_module->has_processes())
                                Pass::call_on_module(design, box_module, "proc");
 
                        if (box_module->get_bool_attribute("\\whitebox")) {
-                               holes_cell = holes_module->addCell(cell->name, derived_name);
+                               holes_cell = holes_module->addCell(cell->name, derived_type);
 
                                if (box_module->has_processes())
                                        Pass::call_on_module(design, box_module, "proc");
@@ -344,7 +341,7 @@ void prep_xaiger(RTLIL::Module *module, bool dff)
                                                }
                                        }
                                        else if (w->port_output)
-                                               conn = holes_module->addWire(stringf("%s.%s", derived_name.c_str(), log_id(port_name)), GetSize(w));
+                                               conn = holes_module->addWire(stringf("%s.%s", derived_type.c_str(), log_id(port_name)), GetSize(w));
                                }
 
                                // For flops only, create an extra 1-bit input that drives a new wire
@@ -383,19 +380,20 @@ void prep_xaiger(RTLIL::Module *module, bool dff)
        }
 }
 
-void prep_delays(RTLIL::Design *design)
+void prep_delays(RTLIL::Design *design, bool dff_mode)
 {
-       std::set<int> delays;
+       TimingInfo timing;
+
+       // Derive all Yosys blackbox modules that are not combinatorial abc9 boxes
+       //   (e.g. DSPs, RAMs, etc.) nor abc9 flops and collect all such instantiations
        pool<Module*> flops;
-       std::vector<Cell*> boxes;
-       std::map<int,std::vector<int>> requireds;
+       std::vector<Cell*> cells;
        for (auto module : design->selected_modules()) {
                if (module->processes.size() > 0) {
                        log("Skipping module %s as it contains processes.\n", log_id(module));
                        continue;
                }
 
-               boxes.clear();
                for (auto cell : module->cells()) {
                        if (cell->type.in(ID($_AND_), ID($_NOT_), ID($__ABC9_FF_), ID($__ABC9_DELAY)))
                                continue;
@@ -405,134 +403,311 @@ void prep_delays(RTLIL::Design *design)
                                continue;
                        if (!inst_module->get_blackbox_attribute())
                                continue;
-                       if (inst_module->get_bool_attribute(ID(abc9_flop))) {
-                               flops.insert(inst_module);
+                       if (inst_module->attributes.count(ID(abc9_box)))
                                continue;
+                       IdString derived_type = inst_module->derive(design, cell->parameters);
+                       inst_module = design->module(derived_type);
+                       log_assert(inst_module);
+
+                       if (dff_mode && inst_module->get_bool_attribute(ID(abc9_flop))) {
+                               flops.insert(inst_module);
+                               continue; // do not add $__ABC9_DELAY boxes to flops
+                               //   as delays will be captured in the flop box
                        }
-                       // All remaining boxes are combinatorial and cannot
-                       //   contain a required time
-                       if (inst_module->attributes.count(ID(abc9_box_id)))
-                               continue;
-                       boxes.emplace_back(cell);
+
+                       if (!timing.count(derived_type))
+                               timing.setup_module(inst_module);
+
+                       cells.emplace_back(cell);
                }
+       }
 
-               delays.clear();
-               requireds.clear();
-               for (auto cell : boxes) {
-                       RTLIL::Module* inst_module = module->design->module(cell->type);
-                       log_assert(inst_module);
-                       for (auto &conn : cell->connections_) {
-                               auto port_wire = inst_module->wire(conn.first);
-                               if (!port_wire->port_input)
-                                       continue;
+       // Insert $__ABC9_DELAY cells on all cells that instantiate blackboxes
+       //   with required times
+       for (auto cell : cells) {
+               auto module = cell->module;
+               RTLIL::Module* inst_module = module->design->module(cell->type);
+               log_assert(inst_module);
+               IdString derived_type = inst_module->derive(design, cell->parameters);
+               inst_module = design->module(derived_type);
+               log_assert(inst_module);
+
+               auto &t = timing.at(derived_type).required;
+               for (auto &conn : cell->connections_) {
+                       auto port_wire = inst_module->wire(conn.first);
+                       if (!port_wire->port_input)
+                               continue;
 
-                               auto it = port_wire->attributes.find("\\abc9_required");
-                               if (it == port_wire->attributes.end())
+                       SigSpec O = module->addWire(NEW_ID, GetSize(conn.second));
+                       for (int i = 0; i < GetSize(conn.second); i++) {
+                               auto d = t.at(SigBit(port_wire,i), 0);
+                               if (d == 0)
                                        continue;
 
-                               int count = 0;
-                               requireds.clear();
-                               if (it->second.flags == 0) {
-                                       count = 1;
-                                       requireds[it->second.as_int()].push_back(0);
-                               }
-                               else
-                                       for (const auto &tok : split_tokens(it->second.decode_string()))
-                                               requireds[atoi(tok.c_str())].push_back(count++);
-                               if (count > 1 && count != GetSize(port_wire))
-                                       log_error("%s.%s is %d bits wide but abc9_required = %s has %d value(s)!\n", log_id(cell->type), log_id(conn.first),
-                                                       GetSize(port_wire), log_signal(it->second), count);
-
-                               SigSpec O = module->addWire(NEW_ID, GetSize(conn.second));
-                               for (const auto &i : requireds) {
 #ifndef NDEBUG
-                                       if (ys_debug(1)) {
-                                               static std::set<std::pair<IdString,IdString>> seen;
-                                               if (seen.emplace(cell->type, conn.first).second) log("%s.%s abc9_required = %d\n", log_id(cell->type), log_id(conn.first), i.first);
-                                       }
-#endif
-                                       delays.insert(i.first);
-                                       for (auto offset : i.second) {
-                                               auto box = module->addCell(NEW_ID, ID($__ABC9_DELAY));
-                                               box->setPort(ID(I), conn.second[offset]);
-                                               box->setPort(ID(O), O[offset]);
-                                               box->setParam(ID(DELAY), i.first);
-                                               conn.second[offset] = O[offset];
-                                       }
+                               if (ys_debug(1)) {
+                                       static std::set<std::tuple<IdString,IdString,int>> seen;
+                                       if (seen.emplace(derived_type, conn.first, i).second) log("%s.%s[%d] abc9_required = %d\n",
+                                                       log_id(cell->type), log_id(conn.first), i, d);
                                }
+#endif
+                               auto box = module->addCell(NEW_ID, ID($__ABC9_DELAY));
+                               box->setPort(ID(I), conn.second[i]);
+                               box->setPort(ID(O), O[i]);
+                               box->setParam(ID(DELAY), d);
+                               conn.second[i] = O[i];
                        }
                }
+       }
+}
 
-               std::stringstream ss;
-               bool first = true;
-               for (auto d : delays) {
-                       if (first)
-                               first = false;
-                       else
-                               ss << " ";
-                       ss << d;
+void prep_lut(RTLIL::Design *design, int maxlut)
+{
+       TimingInfo timing;
+
+       std::vector<std::tuple<int, IdString, int, std::vector<int>>> table;
+       for (auto module : design->modules()) {
+               auto it = module->attributes.find(ID(abc9_lut));
+               if (it == module->attributes.end())
+                       continue;
+
+               auto &t = timing.setup_module(module);
+
+               TimingInfo::NameBit o;
+               std::vector<int> specify;
+               for (const auto &i : t.comb) {
+                       auto &d = i.first.second;
+                       if (o == TimingInfo::NameBit())
+                               o = d;
+                       else if (o != d)
+                               log_error("(* abc9_lut *) module '%s' with has more than one output.\n", log_id(module));
+                       specify.push_back(i.second);
                }
-               module->attributes[ID(abc9_delays)] = ss.str();
+
+               if (maxlut && GetSize(specify) > maxlut)
+                       continue;
+               // ABC requires non-decreasing LUT input delays
+               std::sort(specify.begin(), specify.end());
+               table.emplace_back(GetSize(specify), module->name, it->second.as_int(), std::move(specify));
        }
+       // ABC requires ascending size
+       std::sort(table.begin(), table.end());
 
        std::stringstream ss;
-       for (auto flop_module : flops) {
-               // Skip parameterised flop_modules for now (since we do not
-               //   dynamically generate the abc9_box_id)
-               if (flop_module->name.begins_with("$paramod"))
+       const auto &first = table.front();
+       // If the first entry does not start from a 1-input LUT,
+       //   (as ABC requires) crop the first entry to do so
+       for (int i = 1; i < std::get<0>(first); i++) {
+               ss << "# $__ABC9_LUT" << i << std::endl;
+               ss << i << " " << std::get<2>(first);
+               for (int j = 0; j < i; j++)
+                       ss << " " << std::get<3>(first)[j];
+               ss << std::endl;
+       }
+       for (const auto &i : table) {
+               ss << "# " << log_id(std::get<1>(i)) << std::endl;
+               ss << std::get<0>(i) << " " << std::get<2>(i);
+               for (const auto &j : std::get<3>(i))
+                       ss << " " << j;
+               ss << std::endl;
+       }
+       design->scratchpad_set_string("abc9_ops.lut_library", ss.str());
+}
+
+void write_lut(RTLIL::Module *module, const std::string &dst) {
+       std::ofstream ofs(dst);
+       log_assert(ofs.is_open());
+       ofs << module->design->scratchpad_get_string("abc9_ops.lut_library");
+       ofs.close();
+}
+
+void prep_box(RTLIL::Design *design, bool dff_mode)
+{
+       TimingInfo timing;
+
+       std::stringstream ss;
+       int abc9_box_id = 1;
+       for (auto module : design->modules()) {
+               auto it = module->attributes.find(ID(abc9_box_id));
+               if (it == module->attributes.end())
                        continue;
+               abc9_box_id = std::max(abc9_box_id, it->second.as_int());
+       }
+
+       dict<IdString,std::vector<IdString>> box_ports;
+       for (auto module : design->modules()) {
+               auto abc9_flop = module->get_bool_attribute(ID(abc9_flop));
+               if (abc9_flop) {
+                       auto r = module->attributes.insert(ID(abc9_box_id));
+                       if (!r.second)
+                               continue;
+                       r.first->second = abc9_box_id++;
+
+                       if (dff_mode) {
+                               int num_inputs = 0, num_outputs = 0;
+                               for (auto port_name : module->ports) {
+                                       auto wire = module->wire(port_name);
+                                       log_assert(GetSize(wire) == 1);
+                                       if (wire->port_input) num_inputs++;
+                                       if (wire->port_output) num_outputs++;
+                               }
+                               log_assert(num_outputs == 1);
+
+                               ss << log_id(module) << " " << r.first->second.as_int();
+                               ss << " " << (module->get_bool_attribute(ID::whitebox) ? "1" : "0");
+                               ss << " " << num_inputs+1 << " " << num_outputs << std::endl;
+
+                               ss << "#";
+                               bool first = true;
+                               for (auto port_name : module->ports) {
+                                       auto wire = module->wire(port_name);
+                                       if (!wire->port_input)
+                                               continue;
+                                       if (first)
+                                               first = false;
+                                       else
+                                               ss << " ";
+                                       ss << log_id(wire);
+                               }
+                               ss << " abc9_ff.Q" << std::endl;
+
+                               auto &t = timing.setup_module(module).required;
+                               first = true;
+                               for (auto port_name : module->ports) {
+                                       auto wire = module->wire(port_name);
+                                       if (!wire->port_input)
+                                               continue;
+                                       if (first)
+                                               first = false;
+                                       else
+                                               ss << " ";
+                                       log_assert(GetSize(wire) == 1);
+                                       auto it = t.find(SigBit(wire,0));
+                                       if (it == t.end())
+                                               // Assume that no setup time means zero
+                                               ss << 0;
+                                       else {
+                                               ss << it->second;
 
-               int num_inputs = 0, num_outputs = 0;
-               for (auto port_name : flop_module->ports) {
-                       auto wire = flop_module->wire(port_name);
-                       if (wire->port_input) num_inputs++;
-                       if (wire->port_output) num_outputs++;
+#ifndef NDEBUG
+                                               if (ys_debug(1)) {
+                                                       static std::set<std::pair<IdString,IdString>> seen;
+                                                       if (seen.emplace(module->name, port_name).second) log("%s.%s abc9_required = %d\n", log_id(module),
+                                                                       log_id(port_name), it->second);
+                                               }
+#endif
+                                       }
+
+                               }
+                               // Last input is 'abc9_ff.Q'
+                               ss << " 0" << std::endl << std::endl;
+                               continue;
+                       }
                }
-               log_assert(num_outputs == 1);
+               else {
+                       if (!module->attributes.erase(ID(abc9_box)))
+                               continue;
 
-               ss << log_id(flop_module) << " " << flop_module->attributes.at(ID(abc9_box_id)).as_int();
-               ss << " 1 " << num_inputs+1 << " " << num_outputs << std::endl;
-               bool first = true;
-               for (auto port_name : flop_module->ports) {
-                       auto wire = flop_module->wire(port_name);
-                       if (!wire->port_input)
+                       auto r = module->attributes.insert(ID(abc9_box_id));
+                       if (!r.second)
                                continue;
+                       r.first->second = abc9_box_id++;
+               }
+
+               auto r = box_ports.insert(module->name);
+               if (r.second) {
+                       // Make carry in the last PI, and carry out the last PO
+                       //   since ABC requires it this way
+                       IdString carry_in, carry_out;
+                       for (const auto &port_name : module->ports) {
+                               auto w = module->wire(port_name);
+                               log_assert(w);
+                               if (w->get_bool_attribute("\\abc9_carry")) {
+                                       log_assert(w->port_input != w->port_output);
+                                       if (w->port_input)
+                                               carry_in = port_name;
+                                       else if (w->port_output)
+                                               carry_out = port_name;
+                               }
+                               else
+                                       r.first->second.push_back(port_name);
+                       }
+
+                       if (carry_in != IdString()) {
+                               r.first->second.push_back(carry_in);
+                               r.first->second.push_back(carry_out);
+                       }
+               }
+
+               std::vector<SigBit> inputs;
+               std::vector<SigBit> outputs;
+               for (auto port_name : r.first->second) {
+                       auto wire = module->wire(port_name);
+                       if (wire->port_input)
+                               for (int i = 0; i < GetSize(wire); i++)
+                                       inputs.emplace_back(wire, i);
+                       if (wire->port_output)
+                               for (int i = 0; i < GetSize(wire); i++)
+                                       outputs.emplace_back(wire, i);
+               }
+
+               ss << log_id(module) << " " << module->attributes.at(ID(abc9_box_id)).as_int();
+               ss << " " << (module->get_bool_attribute(ID::whitebox) ? "1" : "0");
+               ss << " " << GetSize(inputs) << " " << GetSize(outputs) << std::endl;
+
+               bool first = true;
+               ss << "#";
+               for (const auto &i : inputs) {
                        if (first)
                                first = false;
                        else
                                ss << " ";
-                       ss << wire->attributes.at("\\abc9_required", 0).as_int();
+                       if (GetSize(i.wire) == 1)
+                               ss << log_id(i.wire);
+                       else
+                               ss << log_id(i.wire) << "[" << i.offset << "]";
                }
-               // Last input is 'abc9_ff.Q'
-               ss << " 0" << std::endl << std::endl;
-       }
-       design->scratchpad_set_string("abc9_ops.box.flops", ss.str());
-}
+               ss << std::endl;
 
-void write_box(RTLIL::Module *module, const std::string &src, const std::string &dst) {
-       std::ofstream ofs(dst);
-       log_assert(ofs.is_open());
+               auto &t = timing.setup_module(module).comb;
+               if (t.empty())
+                       log_warning("(* abc9_box *) module '%s' has no timing (and thus no connectivity) information.\n", log_id(module));
 
-       // Since ABC can only accept one box file, we have to copy
-       //   over the existing box file
-       if (src != "(null)") {
-               std::ifstream ifs(src);
-               ofs << ifs.rdbuf() << std::endl;
-               ifs.close();
-       }
-
-       ofs << module->design->scratchpad_get_string("abc9_ops.box.flops");
+               for (const auto &o : outputs) {
+                       first = true;
+                       for (const auto &i : inputs) {
+                               if (first)
+                                       first = false;
+                               else
+                                       ss << " ";
+                               auto jt = t.find(std::make_pair(i,o));
+                               if (jt == t.end())
+                                       ss << "-";
+                               else
+                                       ss << jt->second;
+                       }
+                       ss << " # ";
+                       if (GetSize(o.wire) == 1)
+                               ss << log_id(o.wire);
+                       else
+                               ss << log_id(o.wire) << "[" << o.offset << "]";
+                       ss << std::endl;
 
-       auto it = module->attributes.find(ID(abc9_delays));
-       if (it != module->attributes.end()) {
-               for (const auto &tok : split_tokens(it->second.decode_string())) {
-                       int d = atoi(tok.c_str());
-                       ofs << "$__ABC9_DELAY@" << d << " " << ABC9_DELAY_BASE_ID + d << " 0 1 1" << std::endl;
-                       ofs << d << std::endl;
                }
-               module->attributes.erase(it);
+               ss << std::endl;
        }
 
+       // ABC expects at least one box
+       if (ss.tellp() == 0)
+               ss << "(dummy) 1 0 0 0";
+
+       design->scratchpad_set_string("abc9_ops.box_library", ss.str());
+}
+
+void write_box(RTLIL::Module *module, const std::string &dst) {
+       std::ofstream ofs(dst);
+       log_assert(ofs.is_open());
+       ofs << module->design->scratchpad_get_string("abc9_ops.box_library");
        ofs.close();
 }
 
@@ -550,23 +725,44 @@ void reintegrate(RTLIL::Module *module)
        for (auto w : mapped_mod->wires())
                module->addWire(remap_name(w->name), GetSize(w));
 
-       dict<IdString,IdString> box_lookup;
+       dict<IdString,std::vector<IdString>> box_ports;
+
        for (auto m : design->modules()) {
-               auto it = m->attributes.find(ID(abc9_box_id));
-               if (it == m->attributes.end())
+               if (!m->attributes.count(ID(abc9_box_id)))
                        continue;
-               if (m->name.begins_with("$paramod"))
+
+               auto r = box_ports.insert(m->name);
+               if (!r.second)
                        continue;
-               auto id = it->second.as_int();
-               auto r YS_ATTRIBUTE(unused) = box_lookup.insert(std::make_pair(stringf("$__boxid%d", id), m->name));
-               log_assert(r.second);
+
+               // Make carry in the last PI, and carry out the last PO
+               //   since ABC requires it this way
+               IdString carry_in, carry_out;
+               for (const auto &port_name : m->ports) {
+                       auto w = m->wire(port_name);
+                       log_assert(w);
+                       if (w->get_bool_attribute("\\abc9_carry")) {
+                               log_assert(w->port_input != w->port_output);
+                               if (w->port_input)
+                                       carry_in = port_name;
+                               else if (w->port_output)
+                                       carry_out = port_name;
+                       }
+                       else
+                               r.first->second.push_back(port_name);
+               }
+
+               if (carry_in != IdString()) {
+                       r.first->second.push_back(carry_in);
+                       r.first->second.push_back(carry_out);
+               }
        }
 
        std::vector<Cell*> boxes;
        for (auto cell : module->cells().to_vector()) {
                if (cell->has_keep_attr())
                        continue;
-               if (cell->type.in(ID($_AND_), ID($_NOT_), ID($__ABC9_FF_), ID($__ABC9_DELAY)))
+               if (cell->type.in(ID($_AND_), ID($_NOT_), ID($__ABC9_FF_)))
                        module->remove(cell);
                else if (cell->attributes.erase("\\abc9_box_seq"))
                        boxes.emplace_back(cell);
@@ -577,10 +773,10 @@ void reintegrate(RTLIL::Module *module)
        dict<RTLIL::Cell*,RTLIL::Cell*> not2drivers;
        dict<SigBit, std::vector<RTLIL::Cell*>> bit2sinks;
 
-       dict<IdString,std::vector<IdString>> box_ports;
        std::map<IdString, int> cell_stats;
        for (auto mapped_cell : mapped_mod->cells())
        {
+               // TODO: Speed up toposort -- we care about NOT ordering only
                toposort.node(mapped_cell->name);
 
                if (mapped_cell->type == ID($_NOT_)) {
@@ -659,42 +855,43 @@ void reintegrate(RTLIL::Module *module)
                                                        bit_drivers[i].insert(mapped_cell->name);
                        }
                }
-               else if (box_lookup.at(mapped_cell->type, IdString()) == ID($__ABC9_DELAY)) {
-                       SigBit I = mapped_cell->getPort(ID(i));
-                       SigBit O = mapped_cell->getPort(ID(o));
-                       if (I.wire)
-                               I.wire = module->wires_.at(remap_name(I.wire->name));
-                       log_assert(O.wire);
-                       O.wire = module->wires_.at(remap_name(O.wire->name));
-                       module->connect(O, I);
-                       continue;
-               }
                else {
                        RTLIL::Cell *existing_cell = module->cell(mapped_cell->name);
                        if (!existing_cell)
                                log_error("Cannot find existing box cell with name '%s' in original design.\n", log_id(mapped_cell));
-                       log_assert(mapped_cell->type.begins_with("$__boxid"));
 
-                       auto type = box_lookup.at(mapped_cell->type, IdString());
-                       if (type == IdString())
-                               log_error("No module with abc9_box_id = %s found.\n", mapped_cell->type.c_str() + strlen("$__boxid"));
-                       mapped_cell->type = type;
+                       if (existing_cell->type == ID($__ABC9_DELAY)) {
+                               SigBit I = mapped_cell->getPort(ID(i));
+                               SigBit O = mapped_cell->getPort(ID(o));
+                               if (I.wire)
+                                       I.wire = module->wires_.at(remap_name(I.wire->name));
+                               log_assert(O.wire);
+                               O.wire = module->wires_.at(remap_name(O.wire->name));
+                               module->connect(O, I);
+                               continue;
+                       }
+
+                       RTLIL::Module* box_module = design->module(existing_cell->type);
+                       IdString derived_type = box_module->derive(design, existing_cell->parameters);
+                       RTLIL::Module* derived_module = design->module(derived_type);
+                       log_assert(derived_module);
+                       log_assert(mapped_cell->type == stringf("$__boxid%d", derived_module->attributes.at("\\abc9_box_id").as_int()));
+                       mapped_cell->type = existing_cell->type;
 
                        RTLIL::Cell *cell = module->addCell(remap_name(mapped_cell->name), mapped_cell->type);
                        cell->parameters = existing_cell->parameters;
                        cell->attributes = existing_cell->attributes;
                        module->swap_names(cell, existing_cell);
 
-                       auto it = mapped_cell->connections_.find("\\i");
-                       log_assert(it != mapped_cell->connections_.end());
-                       SigSpec inputs = std::move(it->second);
-                       mapped_cell->connections_.erase(it);
-                       it = mapped_cell->connections_.find("\\o");
-                       log_assert(it != mapped_cell->connections_.end());
-                       SigSpec outputs = std::move(it->second);
-                       mapped_cell->connections_.erase(it);
+                       auto jt = mapped_cell->connections_.find("\\i");
+                       log_assert(jt != mapped_cell->connections_.end());
+                       SigSpec inputs = std::move(jt->second);
+                       mapped_cell->connections_.erase(jt);
+                       jt = mapped_cell->connections_.find("\\o");
+                       log_assert(jt != mapped_cell->connections_.end());
+                       SigSpec outputs = std::move(jt->second);
+                       mapped_cell->connections_.erase(jt);
 
-                       RTLIL::Module* box_module = design->module(mapped_cell->type);
                        auto abc9_flop = box_module->attributes.count("\\abc9_flop");
                        if (!abc9_flop) {
                                for (const auto &i : inputs)
@@ -705,33 +902,8 @@ void reintegrate(RTLIL::Module *module)
                                                bit_drivers[i].insert(mapped_cell->name);
                        }
 
-                       auto r2 = box_ports.insert(cell->type);
-                       if (r2.second) {
-                               // Make carry in the last PI, and carry out the last PO
-                               //   since ABC requires it this way
-                               IdString carry_in, carry_out;
-                               for (const auto &port_name : box_module->ports) {
-                                       auto w = box_module->wire(port_name);
-                                       log_assert(w);
-                                       if (w->get_bool_attribute("\\abc9_carry")) {
-                                               log_assert(w->port_input != w->port_output);
-                                               if (w->port_input)
-                                                       carry_in = port_name;
-                                               else if (w->port_output)
-                                                       carry_out = port_name;
-                                       }
-                                       else
-                                               r2.first->second.push_back(port_name);
-                               }
-
-                               if (carry_in != IdString()) {
-                                       r2.first->second.push_back(carry_in);
-                                       r2.first->second.push_back(carry_out);
-                               }
-                       }
-
                        int input_count = 0, output_count = 0;
-                       for (const auto &port_name : box_ports.at(cell->type)) {
+                       for (const auto &port_name : box_ports.at(derived_type)) {
                                RTLIL::Wire *w = box_module->wire(port_name);
                                log_assert(w);
 
@@ -754,15 +926,6 @@ void reintegrate(RTLIL::Module *module)
                                                c.wire = module->wires_.at(remap_name(c.wire->name));
                                        newsig.append(c);
                                }
-
-                               auto it = existing_cell->connections_.find(port_name);
-                               if (it == existing_cell->connections_.end())
-                                       continue;
-                               if (GetSize(newsig) > GetSize(it->second))
-                                       newsig = newsig.extract(0, GetSize(it->second));
-                               else
-                                       log_assert(GetSize(newsig) == GetSize(it->second));
-
                                cell->setPort(port_name, newsig);
 
                                if (w->port_input && !abc9_flop)
@@ -827,6 +990,17 @@ void reintegrate(RTLIL::Module *module)
                }
        }
 
+       // ABC9 will return $_NOT_ gates in its mapping (since they are
+       //   treated as being "free"), in particular driving primary
+       //   outputs (real primary outputs, or cells treated as blackboxes)
+       //   or driving box inputs.
+       // Instead of just mapping those $_NOT_ gates into 2-input $lut-s
+       //   at an area and delay cost, see if it is possible to push
+       //   this $_NOT_ into the driving LUT, or into all sink LUTs.
+       // When this is not possible, (i.e. this signal drives two primary
+       //   outputs, only one of which is complemented) and when the driver
+       //   is a LUT, then clone the LUT so that it can be inverted without
+       //   increasing depth/delay.
        for (auto &it : bit_users)
                if (bit_drivers.count(it.first))
                        for (auto driver_cell : bit_drivers.at(it.first))
@@ -925,7 +1099,7 @@ struct Abc9OpsPass : public Pass {
                log("\n");
                log("    -prep_delays\n");
                log("        insert `$__ABC9_DELAY' blackbox cells into the design to account for\n");
-               log("        certain delays, e.g. (* abc9_required *) values.\n");
+               log("        certain required times.\n");
                log("\n");
                log("    -mark_scc\n");
                log("        for an arbitrarily chosen cell in each unique SCC of each selected module\n");
@@ -940,16 +1114,26 @@ struct Abc9OpsPass : public Pass {
                log("        whiteboxes.\n");
                log("\n");
                log("    -dff\n");
-               log("        consider flop cells (those instantiating modules marked with (* abc9_flop *)\n");
-               log("        during -prep_xaiger.\n");
+               log("        consider flop cells (those instantiating modules marked with (* abc9_flop *))\n");
+               log("        during -prep_{delays,xaiger,box}.\n");
                log("\n");
                log("    -prep_dff\n");
                log("        compute the clock domain and initial value of each flop in the design.\n");
                log("        process the '$holes' module to support clock-enable functionality.\n");
                log("\n");
-               log("    -write_box (<src>|(null)) <dst>\n");
-               log("        copy the existing box file from <src> (skip if '(null)') and append any\n");
-               log("        new box definitions.\n");
+               log("    -prep_lut <maxlut>\n");
+               log("        pre-compute the lut library by analysing all modules marked with\n");
+               log("        (* abc9_lut=<area> *).\n");
+               log("\n");
+               log("    -write_lut <dst>\n");
+               log("        write the pre-computed lut library to <dst>.\n");
+               log("\n");
+               log("    -prep_box\n");
+               log("        pre-compute the box library by analysing all modules marked with\n");
+               log("        (* abc9_box *).\n");
+               log("\n");
+               log("    -write_box <dst>\n");
+               log("        write the pre-computed box library to <dst>.\n");
                log("\n");
                log("    -reintegrate\n");
                log("        for each selected module, re-intergrate the module '<module-name>$abc9'\n");
@@ -966,9 +1150,13 @@ struct Abc9OpsPass : public Pass {
                bool mark_scc_mode = false;
                bool prep_dff_mode = false;
                bool prep_xaiger_mode = false;
+               bool prep_lut_mode = false;
+               bool prep_box_mode = false;
                bool reintegrate_mode = false;
                bool dff_mode = false;
-               std::string write_box_src, write_box_dst;
+               std::string write_lut_dst;
+               int maxlut = 0;
+               std::string write_box_dst;
 
                size_t argidx;
                for (argidx = 1; argidx < args.size(); argidx++) {
@@ -993,10 +1181,25 @@ struct Abc9OpsPass : public Pass {
                                prep_delays_mode = true;
                                continue;
                        }
-                       if (arg == "-write_box" && argidx+2 < args.size()) {
-                               write_box_src = args[++argidx];
+                       if (arg == "-prep_lut" && argidx+1 < args.size()) {
+                               prep_lut_mode = true;
+                               maxlut = atoi(args[++argidx].c_str());
+                               continue;
+                       }
+                       if (arg == "-maxlut" && argidx+1 < args.size()) {
+                               continue;
+                       }
+                       if (arg == "-write_lut" && argidx+1 < args.size()) {
+                               write_lut_dst = args[++argidx];
+                               rewrite_filename(write_lut_dst);
+                               continue;
+                       }
+                       if (arg == "-prep_box") {
+                               prep_box_mode = true;
+                               continue;
+                       }
+                       if (arg == "-write_box" && argidx+1 < args.size()) {
                                write_box_dst = args[++argidx];
-                               rewrite_filename(write_box_src);
                                rewrite_filename(write_box_dst);
                                continue;
                        }
@@ -1012,16 +1215,20 @@ struct Abc9OpsPass : public Pass {
                }
                extra_args(args, argidx, design);
 
-               if (!(check_mode || mark_scc_mode || prep_delays_mode || prep_xaiger_mode || prep_dff_mode || !write_box_src.empty() || reintegrate_mode))
-                       log_cmd_error("At least one of -check, -mark_scc, -prep_{delays,xaiger,dff}, -write_box, -reintegrate must be specified.\n");
+               if (!(check_mode || mark_scc_mode || prep_delays_mode || prep_xaiger_mode || prep_dff_mode || prep_lut_mode || prep_box_mode || !write_lut_dst.empty() || !write_box_dst.empty() || reintegrate_mode))
+                       log_cmd_error("At least one of -check, -mark_scc, -prep_{delays,xaiger,dff,lut,box}, -write_{lut,box}, -reintegrate must be specified.\n");
 
-               if (dff_mode && !prep_xaiger_mode)
-                       log_cmd_error("'-dff' option is only relevant for -prep_xaiger.\n");
+               if (dff_mode && !prep_delays_mode && !prep_xaiger_mode && !prep_box_mode)
+                       log_cmd_error("'-dff' option is only relevant for -prep_{delay,xaiger,box}.\n");
 
                if (check_mode)
                        check(design);
                if (prep_delays_mode)
-                       prep_delays(design);
+                       prep_delays(design, dff_mode);
+               if (prep_lut_mode)
+                       prep_lut(design, maxlut);
+               if (prep_box_mode)
+                       prep_box(design, dff_mode);
 
                for (auto mod : design->selected_modules()) {
                        if (mod->get_bool_attribute("\\abc9_holes"))
@@ -1035,8 +1242,10 @@ struct Abc9OpsPass : public Pass {
                        if (!design->selected_whole_module(mod))
                                log_error("Can't handle partially selected module %s!\n", log_id(mod));
 
-                       if (!write_box_src.empty())
-                               write_box(mod, write_box_src, write_box_dst);
+                       if (!write_lut_dst.empty())
+                               write_lut(mod, write_lut_dst);
+                       if (!write_box_dst.empty())
+                               write_box(mod, write_box_dst);
                        if (mark_scc_mode)
                                mark_scc(mod);
                        if (prep_dff_mode)