From d6d727342112eb89451407bd1d9954b8279bd015 Mon Sep 17 00:00:00 2001 From: whitequark Date: Mon, 9 Dec 2019 19:05:52 +0000 Subject: [PATCH] write_cxxrtl: elide wires for results of comb cells used once. This results in massive gains in performance, equally massive reduction in compile time, and improved readability. --- backends/cxxrtl/cxxrtl.cc | 394 ++++++++++++++++++++++++++++++++++---- 1 file changed, 359 insertions(+), 35 deletions(-) diff --git a/backends/cxxrtl/cxxrtl.cc b/backends/cxxrtl/cxxrtl.cc index 2dc7b3d36..43d973ade 100644 --- a/backends/cxxrtl/cxxrtl.cc +++ b/backends/cxxrtl/cxxrtl.cc @@ -26,7 +26,175 @@ USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN +static bool is_unary_cell(RTLIL::IdString type) +{ + return type.in( + ID($not), ID($logic_not), ID($reduce_and), ID($reduce_or), ID($reduce_xor), ID($reduce_xnor), ID($reduce_bool), + ID($pos), ID($neg)); +} + +static bool is_binary_cell(RTLIL::IdString type) +{ + return type.in( + ID($and), ID($or), ID($xor), ID($xnor), ID($logic_and), ID($logic_or), + ID($shl), ID($sshl), ID($shr), ID($sshr), ID($shift), ID($shiftx), + ID($eq), ID($ne), ID($eqx), ID($nex), ID($gt), ID($ge), ID($lt), ID($le), + ID($add), ID($sub), ID($mul), ID($div), ID($mod)); +} + +static bool is_elidable_cell(RTLIL::IdString type) +{ + return is_unary_cell(type) || is_binary_cell(type) || type == ID($mux); +} + +static bool is_ff_cell(RTLIL::IdString type) +{ + return type.in( + ID($dff), ID($dffe), ID($adff), ID($dffsr)); +} + +struct FlowGraph { + struct Node { + enum class Type { + CONNECT, + CELL, + PROCESS + }; + + Type type; + RTLIL::SigSig connect = {}; + const RTLIL::Cell *cell = NULL; + const RTLIL::Process *process = NULL; + }; + + std::vector nodes; + dict> wire_defs, wire_uses; + dict wire_def_elidable, wire_use_elidable; + + ~FlowGraph() + { + for (auto node : nodes) + delete node; + } + + void add_defs(Node *node, const RTLIL::SigSpec &sig, bool elidable) + { + for (auto chunk : sig.chunks()) + if (chunk.wire) + wire_defs[chunk.wire].insert(node); + // Only defs of an entire wire in the right order can be elided. + if (sig.is_wire()) + wire_def_elidable[sig.as_wire()] = elidable; + } + + void add_uses(Node *node, const RTLIL::SigSpec &sig) + { + for (auto chunk : sig.chunks()) + if (chunk.wire) { + wire_uses[chunk.wire].insert(node); + // Only a single use of an entire wire in the right order can be elided. + // (But the use can include other chunks.) + if (!wire_use_elidable.count(chunk.wire)) + wire_use_elidable[chunk.wire] = true; + else + wire_use_elidable[chunk.wire] = false; + } + } + + bool is_elidable(const RTLIL::Wire *wire) const + { + if (wire_def_elidable.count(wire) && wire_use_elidable.count(wire)) + return wire_def_elidable.at(wire) && wire_use_elidable.at(wire); + return false; + } + + // Connections + void add_connect_defs_uses(Node *node, const RTLIL::SigSig &conn) + { + add_defs(node, conn.first, /*elidable=*/true); + add_uses(node, conn.second); + } + + void add_node(const RTLIL::SigSig &conn) + { + Node *node = new Node; + node->type = Node::Type::CONNECT; + node->connect = conn; + nodes.push_back(node); + add_connect_defs_uses(node, conn); + } + + // Cells + void add_cell_defs_uses(Node *node, const RTLIL::Cell *cell) + { + log_assert(cell->known()); + for (auto conn : cell->connections()) { + if (cell->output(conn.first)) { + if (is_ff_cell(cell->type)) + /* non-combinatorial outputs do not introduce defs */; + else if (is_elidable_cell(cell->type)) + add_defs(node, conn.second, /*elidable=*/true); + else + add_defs(node, conn.second, /*elidable=*/false); + } + if (cell->input(conn.first)) + add_uses(node, conn.second); + } + } + + void add_node(const RTLIL::Cell *cell) + { + Node *node = new Node; + node->type = Node::Type::CELL; + node->cell = cell; + nodes.push_back(node); + add_cell_defs_uses(node, cell); + } + + // Processes + void add_case_defs_uses(Node *node, const RTLIL::CaseRule *case_) + { + for (auto &action : case_->actions) { + add_defs(node, action.first, /*elidable=*/false); + add_uses(node, action.second); + } + for (auto sub_switch : case_->switches) { + add_uses(node, sub_switch->signal); + for (auto sub_case : sub_switch->cases) { + for (auto &compare : sub_case->compare) + add_uses(node, compare); + add_case_defs_uses(node, sub_case); + } + } + } + + void add_process_defs_uses(Node *node, const RTLIL::Process *process) + { + add_case_defs_uses(node, &process->root_case); + for (auto sync : process->syncs) + for (auto action : sync->actions) { + if (sync->type == RTLIL::STp || sync->type == RTLIL::STn || sync->type == RTLIL::STe) + /* sync actions do not introduce feedback */; + else + add_defs(node, action.first, /*elidable=*/false); + add_uses(node, action.second); + } + } + + void add_node(const RTLIL::Process *process) + { + Node *node = new Node; + node->type = Node::Type::PROCESS; + node->process = process; + nodes.push_back(node); + add_process_defs_uses(node, process); + } +}; + struct CxxrtlWorker { + bool elide_internal = false; + bool elide_public = false; + std::ostream &f; std::string indent; int temporary = 0; @@ -35,6 +203,7 @@ struct CxxrtlWorker { pool sync_wires; dict sync_types; pool writable_memories; + dict elided_wires; CxxrtlWorker(std::ostream &f) : f(f) {} @@ -183,7 +352,21 @@ struct CxxrtlWorker { dump_const(chunk.data, chunk.width, chunk.offset); return false; } else { - f << mangle(chunk.wire) << (is_lhs ? ".next" : ".curr"); + if (!is_lhs && elided_wires.count(chunk.wire)) { + const FlowGraph::Node &node = elided_wires[chunk.wire]; + switch (node.type) { + case FlowGraph::Node::Type::CONNECT: + dump_connect_elided(node.connect); + break; + case FlowGraph::Node::Type::CELL: + dump_cell_elided(node.cell); + break; + default: + log_assert(false); + } + } else { + f << mangle(chunk.wire) << (is_lhs ? ".next" : ".curr"); + } if (chunk.width == chunk.wire->width && chunk.offset == 0) return false; else if (chunk.width == 1) @@ -228,56 +411,134 @@ struct CxxrtlWorker { f << ".val()"; } - void dump_assign(const RTLIL::SigSig &sigsig) + void collect_sigspec_rhs(const RTLIL::SigSpec &sig, std::vector &cells) + { + for (auto chunk : sig.chunks()) { + if (!chunk.wire || !elided_wires.count(chunk.wire)) + continue; + + const FlowGraph::Node &node = elided_wires[chunk.wire]; + switch (node.type) { + case FlowGraph::Node::Type::CONNECT: + collect_connect(node.connect, cells); + break; + case FlowGraph::Node::Type::CELL: + collect_cell(node.cell, cells); + break; + default: + log_assert(false); + } + } + } + + void dump_connect_elided(const RTLIL::SigSig &conn) + { + dump_sigspec_rhs(conn.second); + } + + bool is_connect_elided(const RTLIL::SigSig &conn) + { + return conn.first.is_wire() && elided_wires.count(conn.first.as_wire()); + } + + void collect_connect(const RTLIL::SigSig &conn, std::vector &cells) { + if (!is_connect_elided(conn)) + return; + + collect_sigspec_rhs(conn.second, cells); + } + + void dump_connect(const RTLIL::SigSig &conn) + { + if (is_connect_elided(conn)) + return; + f << indent; - dump_sigspec_lhs(sigsig.first); + dump_sigspec_lhs(conn.first); f << " = "; - dump_sigspec_rhs(sigsig.second); + dump_connect_elided(conn); f << ";\n"; } - void dump_cell(const RTLIL::Cell *cell) + void dump_cell_elided(const RTLIL::Cell *cell) { - dump_attrs(cell); - f << indent << "// cell " << cell->name.str() << "\n"; // Unary cells - if (cell->type.in( - ID($not), ID($logic_not), ID($reduce_and), ID($reduce_or), ID($reduce_xor), ID($reduce_xnor), ID($reduce_bool), - ID($pos), ID($neg))) { - f << indent; - dump_sigspec_lhs(cell->getPort(ID(Y))); - f << " = " << cell->type.substr(1) << '_' << + if (is_unary_cell(cell->type)) { + f << cell->type.substr(1) << '_' << (cell->getParam(ID(A_SIGNED)).as_bool() ? 's' : 'u') << "<" << cell->getParam(ID(Y_WIDTH)).as_int() << ">("; dump_sigspec_rhs(cell->getPort(ID(A))); - f << ");\n"; + f << ")"; // Binary cells - } else if (cell->type.in( - ID($and), ID($or), ID($xor), ID($xnor), ID($logic_and), ID($logic_or), - ID($shl), ID($sshl), ID($shr), ID($sshr), ID($shift), ID($shiftx), - ID($eq), ID($ne), ID($eqx), ID($nex), ID($gt), ID($ge), ID($lt), ID($le), - ID($add), ID($sub), ID($mul), ID($div), ID($mod))) { - f << indent; - dump_sigspec_lhs(cell->getPort(ID(Y))); - f << " = " << cell->type.substr(1) << '_' << + } else if (is_binary_cell(cell->type)) { + f << cell->type.substr(1) << '_' << (cell->getParam(ID(A_SIGNED)).as_bool() ? 's' : 'u') << (cell->getParam(ID(B_SIGNED)).as_bool() ? 's' : 'u') << "<" << cell->getParam(ID(Y_WIDTH)).as_int() << ">("; dump_sigspec_rhs(cell->getPort(ID(A))); f << ", "; dump_sigspec_rhs(cell->getPort(ID(B))); - f << ");\n"; + f << ")"; // Muxes } else if (cell->type == ID($mux)) { - f << indent; - dump_sigspec_lhs(cell->getPort(ID(Y))); - f << " = "; + f << "("; dump_sigspec_rhs(cell->getPort(ID(S))); f << " ? "; dump_sigspec_rhs(cell->getPort(ID(B))); f << " : "; dump_sigspec_rhs(cell->getPort(ID(A))); + f << ")"; + } else { + log_assert(false); + } + } + + bool is_cell_elided(const RTLIL::Cell *cell) + { + return cell->hasPort(ID(Y)) && cell->getPort(ID(Y)).is_wire() && elided_wires.count(cell->getPort(ID(Y)).as_wire()); + } + + void collect_cell(const RTLIL::Cell *cell, std::vector &cells) + { + if (!is_cell_elided(cell)) + return; + + cells.push_back(cell->name); + for (auto port : cell->connections()) + if (port.first != ID(Y)) + collect_sigspec_rhs(port.second, cells); + } + + void dump_cell(const RTLIL::Cell *cell) + { + if (is_cell_elided(cell)) + return; + if (cell->type == ID($meminit)) + return; // Handled elsewhere. + + std::vector elided_cells; + if (is_elidable_cell(cell->type)) { + for (auto port : cell->connections()) + if (port.first != ID(Y)) + collect_sigspec_rhs(port.second, elided_cells); + } + if (elided_cells.empty()) { + dump_attrs(cell); + f << indent << "// cell " << cell->name.str() << "\n"; + } else { + f << indent << "// cells"; + for (auto elided_cell : elided_cells) + f << " " << elided_cell.str(); + f << "\n"; + } + + // Elidable cells + if (is_elidable_cell(cell->type)) { + f << indent; + dump_sigspec_lhs(cell->getPort(ID(Y))); + f << " = "; + dump_cell_elided(cell); f << ";\n"; // Parallel (one-hot) muxes } else if (cell->type == ID($pmux)) { @@ -309,7 +570,7 @@ struct CxxrtlWorker { dec_indent(); f << indent << "}\n"; // Flip-flops - } else if (cell->type.in(ID($dff), ID($dffe), ID($adff), ID($dffsr))) { + } else if (is_ff_cell(cell->type)) { if (cell->getPort(ID(CLK)).is_wire()) { // Edge-sensitive logic RTLIL::SigBit clk_bit = cell->getPort(ID(CLK))[0]; @@ -437,8 +698,6 @@ struct CxxrtlWorker { f << indent << "}\n"; } // Memory initializers - } else if (cell->type == ID($meminit)) { - // Handled elsewhere. } else if (cell->type[0] == '$') { log_cmd_error("Unsupported internal cell `%s'.\n", cell->type.c_str()); } else { @@ -446,6 +705,15 @@ struct CxxrtlWorker { } } + void dump_assign(const RTLIL::SigSig &sigsig) + { + f << indent; + dump_sigspec_lhs(sigsig.first); + f << " = "; + dump_sigspec_rhs(sigsig.second); + f << ";\n"; + } + void dump_case_rule(const RTLIL::CaseRule *rule) { for (auto action : rule->actions) @@ -571,6 +839,9 @@ struct CxxrtlWorker { void dump_wire(const RTLIL::Wire *wire) { + if (elided_wires.count(wire)) + return; + dump_attrs(wire); f << indent << "wire<" << wire->width << "> " << mangle(wire); if (wire->attributes.count(ID(init))) { @@ -661,7 +932,7 @@ struct CxxrtlWorker { dump_cell(cell); f << indent << "// connections\n"; for (auto conn : module->connections()) - dump_assign(conn); + dump_connect(conn); for (auto proc : module->processes) dump_process(proc.second); for (auto sync_type : sync_types) { @@ -680,6 +951,8 @@ struct CxxrtlWorker { inc_indent(); f << indent << "bool changed = false;\n"; for (auto wire : module->wires()) { + if (elided_wires.count(wire)) + continue; if (sync_wires[wire]) { std::string wire_prev = mangle(wire) + "_prev"; std::string wire_curr = mangle(wire) + ".curr"; @@ -773,10 +1046,16 @@ struct CxxrtlWorker { void analyze_design(RTLIL::Design *design) { for (auto module : design->modules()) { + FlowGraph flow; SigMap &sigmap = sigmaps[module]; sigmap.set(module); + for (auto conn : module->connections()) + flow.add_node(conn); + for (auto cell : module->cells()) { + flow.add_node(cell); + // Various DFF cells are treated like posedge/negedge processes, see above for details. if (cell->type.in(ID($dff), ID($dffe), ID($adff), ID($dffsr))) { if (cell->getPort(ID(CLK)).is_wire()) @@ -802,7 +1081,9 @@ struct CxxrtlWorker { log_assert(false); } - for (auto proc : module->processes) + for (auto proc : module->processes) { + flow.add_node(proc.second); + for (auto sync : proc.second->syncs) switch (sync->type) { // Edge-type sync rules require pre-registration. @@ -826,6 +1107,18 @@ struct CxxrtlWorker { case RTLIL::STg: log_cmd_error("Global clock is not supported.\n"); } + } + + for (auto wire : module->wires()) { + if (!flow.is_elidable(wire)) continue; + if (wire->port_id != 0) continue; + if (wire->get_bool_attribute(ID(keep))) continue; + if (wire->name.begins_with("$") && !elide_internal) continue; + if (wire->name.begins_with("\\") && !elide_public) continue; + if (sync_wires[wire]) continue; + log_assert(flow.wire_defs[wire].size() == 1); + elided_wires[wire] = **flow.wire_defs[wire].begin(); + } } } @@ -879,23 +1172,54 @@ struct CxxrtlBackend : public Backend { log("\n"); log("Write C++ code for simulating the design.\n"); log("\n"); + // -O2 (and not -O1) is the default because wire elision results in dramatic (>10x) decrease in compile- and run-time, + // which is well worth the need to manually drop to -O1 or to mark interesting wires with (*keep*). + log(" -O \n"); + log(" set the optimization level. the default is -O2.\n"); + log("\n"); + log(" -O0\n"); + log(" no optimization.\n"); + log("\n"); + log(" -O1\n"); + log(" elide internal wires if possible.\n"); + log("\n"); + log(" -O2\n"); + log(" like -O1, and elide public wires not marked (*keep*) if possible.\n"); + log("\n"); } void execute(std::ostream *&f, std::string filename, std::vector args, RTLIL::Design *design) YS_OVERRIDE { + int opt_level = 2; + log_header(design, "Executing CXXRTL backend.\n"); size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { - // if (args[argidx] == "-top" && argidx+1 < args.size()) { - // top_module_name = args[++argidx]; - // continue; - // } + if (args[argidx] == "-O" && argidx+1 < args.size()) { + opt_level = std::stoi(args[++argidx]); + continue; + } + if (args[argidx].substr(0, 2) == "-O" && args[argidx].size() == 3 && isdigit(args[argidx][2])) { + opt_level = std::stoi(args[argidx].substr(2)); + continue; + } break; } extra_args(f, filename, args, argidx); CxxrtlWorker worker(*f); + switch (opt_level) { + case 2: + worker.elide_public = true; + case 1: + worker.elide_internal = true; + case 0: + break; + default: + log_cmd_error("Invalid optimization level %d.\n", opt_level); + } + worker.prepare_design(design); worker.dump_design(design); } -- 2.30.2