From: whitequark Date: Tue, 21 Apr 2020 13:33:42 +0000 (+0000) Subject: cxxrtl: localize wires with multiple comb drivers, too. X-Git-Tag: working-ls180~602^2~9 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=757cbb3c809091bf20a2f3b3a8b621010de01a8d;p=yosys.git cxxrtl: localize wires with multiple comb drivers, too. Before this commit, any wire that was not driven by an output port of exactly one comb cell would not be localized, even if there were no feedback arcs through that wire. This would cause the wire to become buffered and require (often quite a few) extraneous delta cycles during evaluation. To alleviate this problem, -O5 was running `splitnets -driver`. However, this solution was mistaken. Because `splitnets -driver` followed by `opt_clean -purge` would produce more nets with multiple drivers, it would have to be iterated to fixpoint. Moreover, even if this was done, it would not be sufficient because `opt_clean -purge` does not currently remove wires with the `\init` attribute (and it is not desirable to remove such wires, since they correspond to registers and may be useful for debugging). The proper solution is to consider the condition in which a wire may be localized. Specifically, if there are no feedback arcs through this wire, and no part of the wire is driven by an output of a sync cell, then the wire holds no state and is localizable. After this commit, the original condition for not localizing a wire is replaced by a check for any sync cell driving it. This makes it unnecessary to run `splitnets -driver` in the majority of cases to get a design with no buffered wires, and -O5 no longer includes that pass. As a result, Minerva SRAM SoC no longer has any buffered wires, and runs ~27% faster. In addition, this commit prepares the flow graph for introduction of sync outputs of black boxes. Co-authored-by: Jean-François Nguyen --- diff --git a/backends/cxxrtl/cxxrtl.cc b/backends/cxxrtl/cxxrtl.cc index 196a27524..8f2bf6836 100644 --- a/backends/cxxrtl/cxxrtl.cc +++ b/backends/cxxrtl/cxxrtl.cc @@ -225,7 +225,7 @@ struct FlowGraph { }; std::vector nodes; - dict> wire_defs, wire_uses; + dict> wire_comb_defs, wire_sync_defs, wire_uses; dict wire_def_elidable, wire_use_elidable; ~FlowGraph() @@ -234,13 +234,17 @@ struct FlowGraph { delete node; } - void add_defs(Node *node, const RTLIL::SigSpec &sig, bool elidable) + void add_defs(Node *node, const RTLIL::SigSpec &sig, bool is_sync, bool elidable) { for (auto chunk : sig.chunks()) - if (chunk.wire) - wire_defs[chunk.wire].insert(node); - // Only defs of an entire wire in the right order can be elided. - if (sig.is_wire()) + if (chunk.wire) { + if (is_sync) + wire_sync_defs[chunk.wire].insert(node); + else + wire_comb_defs[chunk.wire].insert(node); + } + // Only comb defs of an entire wire in the right order can be elided. + if (!is_sync && sig.is_wire()) wire_def_elidable[sig.as_wire()] = elidable; } @@ -268,7 +272,7 @@ struct FlowGraph { // Connections void add_connect_defs_uses(Node *node, const RTLIL::SigSig &conn) { - add_defs(node, conn.first, /*elidable=*/true); + add_defs(node, conn.first, /*is_sync=*/false, /*elidable=*/true); add_uses(node, conn.second); } @@ -288,16 +292,16 @@ struct FlowGraph { log_assert(cell->known()); for (auto conn : cell->connections()) { if (cell->output(conn.first)) { - if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool())) - /* non-combinatorial outputs do not introduce defs */; - else if (is_elidable_cell(cell->type)) - add_defs(node, conn.second, /*elidable=*/true); + if (is_elidable_cell(cell->type)) + add_defs(node, conn.second, /*is_sync=*/false, /*elidable=*/true); + else if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool())) + add_defs(node, conn.second, /*is_sync=*/true, /*elidable=*/false); else if (is_internal_cell(cell->type)) - add_defs(node, conn.second, /*elidable=*/false); + add_defs(node, conn.second, /*is_sync=*/false, /*elidable=*/false); else { // Unlike outputs of internal cells (which generate code that depends on the ability to set the output // wire bits), outputs of user cells are normal wires, and the wires connected to them can be elided. - add_defs(node, conn.second, /*elidable=*/true); + add_defs(node, conn.second, /*is_sync=*/false, /*elidable=*/true); } } if (cell->input(conn.first)) @@ -319,7 +323,7 @@ struct FlowGraph { void add_case_defs_uses(Node *node, const RTLIL::CaseRule *case_) { for (auto &action : case_->actions) { - add_defs(node, action.first, /*elidable=*/false); + add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false); add_uses(node, action.second); } for (auto sub_switch : case_->switches) { @@ -338,9 +342,9 @@ struct FlowGraph { for (auto sync : process->syncs) for (auto action : sync->actions) { if (sync->type == RTLIL::STp || sync->type == RTLIL::STn || sync->type == RTLIL::STe) - /* sync actions do not introduce feedback */; + add_defs(node, action.first, /*is_sync=*/true, /*elidable=*/false); else - add_defs(node, action.first, /*elidable=*/false); + add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false); add_uses(node, action.second); } } @@ -414,7 +418,7 @@ struct CxxrtlWorker { bool elide_public = false; bool localize_internal = false; bool localize_public = false; - bool run_splitnets = false; + bool run_opt_clean_purge = false; bool max_opt_level = false; std::ostringstream f; @@ -1792,8 +1796,8 @@ struct CxxrtlWorker { if (wire->name.begins_with("$") && !elide_internal) continue; if (wire->name.begins_with("\\") && !elide_public) continue; if (sync_wires[wire]) continue; - log_assert(flow.wire_defs[wire].size() == 1); - elided_wires[wire] = **flow.wire_defs[wire].begin(); + log_assert(flow.wire_comb_defs[wire].size() == 1); + elided_wires[wire] = **flow.wire_comb_defs[wire].begin(); } // Elided wires that are outputs of internal cells are always connected to a well known port (Y). @@ -1805,9 +1809,9 @@ struct CxxrtlWorker { cell_wire_defs[cell][conn.second.as_wire()] = conn.first; dict, hash_ptr_ops> node_defs; - for (auto wire_def : flow.wire_defs) - for (auto node : wire_def.second) - node_defs[node].insert(wire_def.first); + for (auto wire_comb_def : flow.wire_comb_defs) + for (auto node : wire_comb_def.second) + node_defs[node].insert(wire_comb_def.first); Scheduler scheduler; dict::Vertex*, hash_ptr_ops> node_map; @@ -1858,8 +1862,7 @@ struct CxxrtlWorker { if (wire->name.begins_with("$") && !localize_internal) continue; if (wire->name.begins_with("\\") && !localize_public) continue; if (sync_wires[wire]) continue; - // Wires connected to synchronous outputs do not introduce defs. - if (flow.wire_defs[wire].size() != 1) continue; + if (flow.wire_sync_defs.count(wire) > 0) continue; localized_wires.insert(wire); } @@ -1871,8 +1874,7 @@ struct CxxrtlWorker { // also require more than one delta cycle to converge. pool buffered_wires; for (auto wire : module->wires()) { - // Only wires connected to combinatorial outputs introduce defs. - if (flow.wire_defs[wire].size() > 0 && !elided_wires.count(wire) && !localized_wires[wire]) { + if (flow.wire_comb_defs[wire].size() > 0 && !elided_wires.count(wire) && !localized_wires[wire]) { if (!feedback_wires[wire]) buffered_wires.insert(wire); } @@ -1942,11 +1944,8 @@ struct CxxrtlWorker { if (has_sync_init || has_packed_mem) check_design(design, has_sync_init, has_packed_mem); log_assert(!(has_sync_init || has_packed_mem)); - - if (run_splitnets) { - Pass::call(design, "splitnets -driver"); + if (run_opt_clean_purge) Pass::call(design, "opt_clean -purge"); - } log("\n"); analyze_design(design); } @@ -2134,7 +2133,7 @@ struct CxxrtlBackend : public Backend { log(" like -O3, and localize public wires not marked (*keep*) if possible.\n"); log("\n"); log(" -O5\n"); - log(" like -O4, and run `splitnets -driver; opt_clean -purge` first.\n"); + log(" like -O4, and run `opt_clean -purge` first.\n"); log("\n"); } void execute(std::ostream *&f, std::string filename, std::vector args, RTLIL::Design *design) YS_OVERRIDE @@ -2170,7 +2169,7 @@ struct CxxrtlBackend : public Backend { switch (opt_level) { case 5: worker.max_opt_level = true; - worker.run_splitnets = true; + worker.run_opt_clean_purge = true; case 4: worker.localize_public = true; case 3: