flowmap: new techmap pass.
authorwhitequark <whitequark@whitequark.org>
Wed, 2 Jan 2019 14:09:53 +0000 (14:09 +0000)
committerwhitequark <whitequark@whitequark.org>
Thu, 3 Jan 2019 14:28:19 +0000 (14:28 +0000)
passes/opt/Makefile.inc
passes/techmap/Makefile.inc
passes/techmap/flowmap.cc [new file with mode: 0644]

index 0f596b1f49e99fcd3ce1a5dd058c4c9ae14e8c66..c3e0a2a40204b70ef7134455eb40670822bf916c 100644 (file)
@@ -6,12 +6,12 @@ OBJS += passes/opt/opt_reduce.o
 OBJS += passes/opt/opt_rmdff.o
 OBJS += passes/opt/opt_clean.o
 OBJS += passes/opt/opt_expr.o
-OBJS += passes/opt/opt_lut.o
 
 ifneq ($(SMALL),1)
 OBJS += passes/opt/share.o
 OBJS += passes/opt/wreduce.o
 OBJS += passes/opt/opt_demorgan.o
 OBJS += passes/opt/rmports.o
+OBJS += passes/opt/opt_lut.o
 endif
 
index 4faa0ab00b885e339701ae12bf62e7d867ac1415..cf9e198adaa3004cf9977f21d27da22b1dcd7aef 100644 (file)
@@ -36,6 +36,7 @@ OBJS += passes/techmap/attrmvcp.o
 OBJS += passes/techmap/attrmap.o
 OBJS += passes/techmap/zinit.o
 OBJS += passes/techmap/dff2dffs.o
+OBJS += passes/techmap/flowmap.o
 endif
 
 GENFILES += passes/techmap/techmap.inc
diff --git a/passes/techmap/flowmap.cc b/passes/techmap/flowmap.cc
new file mode 100644 (file)
index 0000000..cdca78d
--- /dev/null
@@ -0,0 +1,873 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2018  whitequark <whitequark@whitequark.org>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+// [[CITE]]
+// Jason Cong; Yuzheng Ding, "An Optimal Technology Mapping Algorithm for Delay Optimization in Lookup-Table Based FPGA Designs,"
+// Computer-Aided Design of Integrated Circuits and Systems, IEEE Transactions on, vol. 13, no. 1, Jan 1994
+// doi: 10.1109/43.273754
+
+// Required reading material:
+//
+// Min-cut max-flow theorem:
+//   https://www.coursera.org/lecture/algorithms-part2/maxflow-mincut-theorem-beb9G
+// FlowMap paper:
+//   http://cadlab.cs.ucla.edu/~cong/papers/iccad92.pdf
+
+// Notes on implementation:
+//
+// 1. In the paper, the nodes are logic elements (analogous to Yosys cells) and edges are wires. However, in our implementation, we use
+// an inverted approach: the nodes are Yosys wire bits, and the edges are derived from (but aren't represented by) Yosys cells.
+// This may seem counterintuitive. Three observations may help understanding this. First, for a cell with a 1-bit Y output that is
+// the sole driver of its output net (which is the typical case), these representations are equivalent, because there is an exact
+// correspondence between cells and output wires. Second, in the paper, primary inputs (analogous to Yosys cell or module ports) are
+// nodes, and in Yosys, inputs are wires; our approach allows a direct mapping from both primary inputs and 1-output logic elements to
+// flow graph nodes. Third, Yosys cells may have multiple outputs or multi-bit outputs, and by using Yosys wire bits as flow graph nodes,
+// such cells are supported without any additional effort; any Yosys cell with n output wire bits ends up being split into n flow graph
+// nodes.
+//
+// 2. The paper introduces three networks: Nt, Nt', and Nt''. The network Nt is directly represented by a subgraph of RTLIL graph,
+// which is parsed into an equivalent but easier to traverse representation in FlowmapWorker. The network Nt' is built explicitly
+// from a subgraph of Nt, and uses a similar representation in FlowGraph. The network Nt'' is implicit in FlowGraph, which is possible
+// because of the following observation: each Nt' node corresponds to an Nt'' edge of capacity 1, and each Nt' edge corresponds to
+// an Nt'' edge of capacity ∞. Therefore, we only need to explicitly record flow for Nt' edges and through Nt' nodes.
+//
+// 3. The paper ambiguously states: "Moreover, we can find such a cut (X′′, X̅′′) by performing a depth first search starting at the source s,
+// and including in X′′ all the nodes which are reachable from s." This actually refers to a specific kind of search, mincut computation.
+// Mincut computation involves computing the set of nodes reachable from s by an undirected path with no full (i.e. zero capacity) forward
+// edges or empty (i.e. no flow) backward edges.
+
+#include "kernel/yosys.h"
+#include "kernel/sigtools.h"
+#include "kernel/modtools.h"
+#include "kernel/consteval.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+struct GraphStyle
+{
+       string label;
+       string color;
+
+       GraphStyle(string label = "", string color = "black") :
+               label(label), color(color) {}
+};
+
+static string dot_escape(string value)
+{
+       std::string escaped;
+       for (char c : value) {
+               if (c == '\n')
+               {
+                       escaped += "\\n";
+                       continue;
+               }
+               if (c == '\\' || c == '"')
+                       escaped += "\\";
+               escaped += c;
+       }
+       return escaped;
+}
+
+static void dump_dot_graph(string filename,
+                           pool<RTLIL::SigBit> nodes, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges,
+                           pool<RTLIL::SigBit> inputs, pool<RTLIL::SigBit> outputs,
+                           std::function<GraphStyle(RTLIL::SigBit)> node_style =
+                                       [](RTLIL::SigBit) { return GraphStyle{}; },
+                           std::function<GraphStyle(RTLIL::SigBit, RTLIL::SigBit)> edge_style =
+                                       [](RTLIL::SigBit, RTLIL::SigBit) { return GraphStyle{}; },
+                           string name = "")
+{
+       FILE *f = fopen(filename.c_str(), "w");
+       fprintf(f, "digraph \"%s\" {\n", name.c_str());
+       fprintf(f, "  rankdir=\"TB\";\n");
+
+       dict<RTLIL::SigBit, int> ids;
+       for (auto node : nodes)
+       {
+               ids[node] = ids.size();
+
+               string shape = "ellipse";
+               if (inputs[node])
+                       shape = "box";
+               if (outputs[node])
+                       shape = "octagon";
+               auto prop = node_style(node);
+               string id;
+               if (node == SigBit())
+                       id = "(source)";
+               else
+                       id = log_signal(node);
+               fprintf(f, "  n%d [ shape=%s, fontname=\"Monospace\", label=\"%s%s\", color=\"%s\" ];\n",
+                       ids[node], shape.c_str(), dot_escape(id).c_str(), dot_escape(prop.label.c_str()).c_str(), prop.color.c_str());
+       }
+
+       fprintf(f, "  { rank=\"source\"; ");
+       for (auto input : inputs)
+               if (nodes[input])
+                       fprintf(f, "n%d; ", ids[input]);
+       fprintf(f, "}\n");
+
+       fprintf(f, "  { rank=\"sink\"; ");
+       for (auto output : outputs)
+               if (nodes[output])
+                       fprintf(f, "n%d; ", ids[output]);
+       fprintf(f, "}\n");
+
+       for (auto edge : edges)
+       {
+               auto source = edge.first;
+               for (auto sink : edge.second) {
+                       if (nodes[source] && nodes[sink])
+                       {
+                               auto prop = edge_style(source, sink);
+                               fprintf(f, "  n%d -> n%d [ label=\"%s\", color=\"%s\" ];\n",
+                                       ids[source], ids[sink], dot_escape(prop.label.c_str()).c_str(), prop.color.c_str());
+                       }
+               }
+       }
+
+       fprintf(f, "}\n");
+       fclose(f);
+}
+
+struct FlowGraph
+{
+       const RTLIL::SigBit source;
+       RTLIL::SigBit sink;
+       pool<RTLIL::SigBit> nodes = {source};
+       dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges_fw, edges_bw;
+
+       const int MAX_NODE_FLOW = 1;
+       dict<RTLIL::SigBit, int> node_flow;
+       dict<pair<RTLIL::SigBit, RTLIL::SigBit>, int> edge_flow;
+
+       dict<RTLIL::SigBit, pool<RTLIL::SigBit>> collapsed;
+
+       void dump_dot_graph(string filename)
+       {
+               auto node_style = [&](RTLIL::SigBit node) {
+                       string label;
+                       for (auto collapsed_node : collapsed[node])
+                               label += stringf(" %s", log_signal(collapsed_node));
+                       int flow = node_flow[node];
+                       if (node != source && node != sink)
+                               label += stringf("\n%d/%d", flow, MAX_NODE_FLOW);
+                       else
+                               label += stringf("\n%d/∞", flow);
+                       return GraphStyle{label, flow < MAX_NODE_FLOW ? "green" : "black"};
+               };
+               auto edge_style = [&](RTLIL::SigBit source, RTLIL::SigBit sink) {
+                       int flow = edge_flow[{source, sink}];
+                       return GraphStyle{stringf("%d/∞", flow), flow > 0 ? "blue" : "black"};
+               };
+               ::dump_dot_graph(filename, nodes, edges_fw, {source}, {sink}, node_style, edge_style);
+       }
+
+       // Here, we are working on the Nt'' network, but our representation is the Nt' network.
+       // The difference between these is that where in Nt' we have a subgraph:
+       //
+       //   v1 -> v2 -> v3
+       //
+       // in Nt'' we have a corresponding subgraph:
+       //
+       //   v'1b -∞-> v'2t -f-> v'2b -∞-> v'3t
+       //
+       // To address this, we split each node v into two nodes, v't and v'b. This representation is virtual,
+       // in the sense that nodes v't and v'b are overlaid on top of the original node v, and only exist
+       // in paths and worklists.
+
+       struct NodePrime
+       {
+               RTLIL::SigBit node;
+               bool is_bottom;
+
+               NodePrime(RTLIL::SigBit node, bool is_bottom) :
+                       node(node), is_bottom(is_bottom) {}
+
+               bool operator==(const NodePrime &other) const
+               {
+                       return node == other.node && is_bottom == other.is_bottom;
+               }
+               bool operator!=(const NodePrime &other) const
+               {
+                       return !(*this == other);
+               }
+               unsigned int hash() const
+               {
+                       return hash_ops<pair<RTLIL::SigBit, int>>::hash({node, is_bottom});
+               }
+
+               static NodePrime top(RTLIL::SigBit node)
+               {
+                       return NodePrime(node, /*is_bottom=*/false);
+               }
+
+               static NodePrime bottom(RTLIL::SigBit node)
+               {
+                       return NodePrime(node, /*is_bottom=*/true);
+               }
+
+               NodePrime as_top() const
+               {
+                       log_assert(is_bottom);
+                       return top(node);
+               }
+
+               NodePrime as_bottom() const
+               {
+                       log_assert(!is_bottom);
+                       return bottom(node);
+               }
+       };
+
+       bool find_augmenting_path(bool commit)
+       {
+               NodePrime source_prime = {source, true};
+               NodePrime sink_prime = {sink, false};
+               vector<NodePrime> path = {source_prime};
+               pool<NodePrime> visited = {};
+               bool found;
+               do {
+                       found = false;
+
+                       auto node_prime = path.back();
+                       visited.insert(node_prime);
+
+                       if (!node_prime.is_bottom) // vt
+                       {
+                               if (!visited[node_prime.as_bottom()] && node_flow[node_prime.node] < MAX_NODE_FLOW)
+                               {
+                                       path.push_back(node_prime.as_bottom());
+                                       found = true;
+                               }
+                               else
+                               {
+                                       for (auto node_pred : edges_bw[node_prime.node])
+                                       {
+                                               if (!visited[NodePrime::bottom(node_pred)] && edge_flow[{node_pred, node_prime.node}] > 0)
+                                               {
+                                                       path.push_back(NodePrime::bottom(node_pred));
+                                                       found = true;
+                                                       break;
+                                               }
+                                       }
+                               }
+                       }
+                       else // vb
+                       {
+                               if (!visited[node_prime.as_top()] && node_flow[node_prime.node] > 0)
+                               {
+                                       path.push_back(node_prime.as_top());
+                                       found = true;
+                               }
+                               else
+                               {
+                                       for (auto node_succ : edges_fw[node_prime.node])
+                                       {
+                                               if (!visited[NodePrime::top(node_succ)] /* && edge_flow[...] < ∞ */)
+                                               {
+                                                       path.push_back(NodePrime::top(node_succ));
+                                                       found = true;
+                                                       break;
+                                               }
+                                       }
+                               }
+                       }
+
+                       if (!found && path.size() > 1)
+                       {
+                               path.pop_back();
+                               found = true;
+                       }
+               } while(path.back() != sink_prime && found);
+
+               if (commit && path.back() == sink_prime)
+               {
+                       auto prev_prime = path.front();
+                       for (auto node_prime : path)
+                       {
+                               if (node_prime == source_prime)
+                                       continue;
+
+                               log_assert(prev_prime.is_bottom ^ node_prime.is_bottom);
+                               if (prev_prime.node == node_prime.node)
+                               {
+                                       auto node = node_prime.node;
+                                       if (!prev_prime.is_bottom && node_prime.is_bottom)
+                                       {
+                                               log_assert(node_flow[node] == 0);
+                                               node_flow[node]++;
+                                       }
+                                       else
+                                       {
+                                               log_assert(node_flow[node] != 0);
+                                               node_flow[node]--;
+                                       }
+                               }
+                               else
+                               {
+                                       if (prev_prime.is_bottom && !node_prime.is_bottom)
+                                       {
+                                               log_assert(true /* edge_flow[...] < ∞ */);
+                                               edge_flow[{prev_prime.node, node_prime.node}]++;
+                                       }
+                                       else
+                                       {
+                                               log_assert((edge_flow[{node_prime.node, prev_prime.node}] > 0));
+                                               edge_flow[{node_prime.node, prev_prime.node}]--;
+                                       }
+                               }
+                               prev_prime = node_prime;
+                       }
+
+                       node_flow[source]++;
+                       node_flow[sink]++;
+               }
+               return path.back() == sink_prime;
+       }
+
+       int maximum_flow(int order)
+       {
+               int flow = 0;
+               while (flow < order && find_augmenting_path(/*commit=*/true))
+                       flow++;
+               return flow + find_augmenting_path(/*commit=*/false);
+       }
+
+       pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> edge_cut()
+       {
+               pool<RTLIL::SigBit> x, xi;
+
+               NodePrime source_prime = {source, true};
+               NodePrime sink_prime = {sink, false};
+               pool<NodePrime> worklist = {source_prime}, visited;
+               while (!worklist.empty())
+               {
+                       auto node_prime = worklist.pop();
+                       if (visited[node_prime])
+                               continue;
+                       visited.insert(node_prime);
+
+                       if (!node_prime.is_bottom)
+                               x.insert(node_prime.node);
+
+                       // Mincut is constructed by traversing a graph in an undirected way along forward edges that aren't full, or backward edges
+                       // that aren't empty.
+                       if (!node_prime.is_bottom) // top
+                       {
+                               if (node_flow[node_prime.node] < MAX_NODE_FLOW)
+                                       worklist.insert(node_prime.as_bottom());
+                               for (auto node_pred : edges_bw[node_prime.node])
+                                       if (edge_flow[{node_pred, node_prime.node}] > 0)
+                                               worklist.insert(NodePrime::bottom(node_pred));
+                       }
+                       else // bottom
+                       {
+                               if (node_flow[node_prime.node] > 0)
+                                       worklist.insert(node_prime.as_top());
+                               for (auto node_succ : edges_fw[node_prime.node])
+                                       if (true /* edge_flow[...] < ∞ */)
+                                               worklist.insert(NodePrime::top(node_succ));
+                       }
+               }
+
+               for (auto node : nodes)
+                       if (!x[node])
+                               xi.insert(node);
+
+               for (auto collapsed_node : collapsed[sink])
+                       xi.insert(collapsed_node);
+
+               log_assert(!x[sink] && xi[sink]);
+               return {x, xi};
+       }
+};
+
+struct FlowmapWorker
+{
+       int order;
+       pool<IdString> cell_types;
+       bool debug;
+
+       RTLIL::Module *module;
+       SigMap sigmap;
+       ModIndex index;
+       pool<RTLIL::Cell*> cells;
+
+       pool<RTLIL::SigBit> nodes, inputs, outputs;
+       dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges_fw, edges_bw;
+       dict<RTLIL::SigBit, int> labels;
+
+       dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_gates, lut_inputs;
+
+       dict<RTLIL::SigBit, ModIndex::PortInfo> node_origins;
+       dict<RTLIL::Cell*, pool<RTLIL::SigBit>> cell_fanout;
+
+       int mapped_count = 0, packed_count = 0, unique_packed_count = 0;
+
+       void dump_dot_graph(string filename, pool<RTLIL::SigBit> subgraph = {}, pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> cut = {})
+       {
+               if (subgraph.empty())
+                       subgraph = nodes;
+
+               auto node_style = [&](RTLIL::SigBit node) {
+                       string label, color;
+                       if (labels[node] == -1)
+                               label = string("\n<unlabeled>");
+                       else
+                               label = stringf("\nl=%d", labels[node]);
+                       color = "black";
+                       if (cut.first[node])
+                               color = "blue";
+                       if (cut.second[node])
+                               color = "red";
+                       return GraphStyle{label, color};
+               };
+               auto edge_style = [&](RTLIL::SigBit, RTLIL::SigBit) {
+                       return GraphStyle{};
+               };
+               ::dump_dot_graph(filename, subgraph, edges_fw, inputs, outputs, node_style, edge_style, module->name.str());
+       }
+
+       pool<RTLIL::SigBit> find_subgraph(RTLIL::SigBit sink)
+       {
+               pool<RTLIL::SigBit> subgraph;
+               pool<RTLIL::SigBit> worklist = {sink};
+               while (!worklist.empty())
+               {
+                       auto node = worklist.pop();
+                       subgraph.insert(node);
+                       for (auto source : edges_bw[node])
+                       {
+                               if (!subgraph[source])
+                                       worklist.insert(source);
+                       }
+               }
+               return subgraph;
+       }
+
+       FlowGraph build_flow_graph(RTLIL::SigBit sink, int p)
+       {
+               FlowGraph flow_graph;
+               flow_graph.sink = sink;
+
+               pool<RTLIL::SigBit> worklist = {sink}, visited;
+               while (!worklist.empty())
+               {
+                       auto node = worklist.pop();
+                       visited.insert(node);
+
+                       auto collapsed_node = labels[node] == p ? sink : node;
+                       if (node != collapsed_node)
+                               flow_graph.collapsed[collapsed_node].insert(node);
+                       flow_graph.nodes.insert(collapsed_node);
+
+                       for (auto node_pred : edges_bw[node])
+                       {
+                               auto collapsed_node_pred = labels[node_pred] == p ? sink : node_pred;
+                               if (node_pred != collapsed_node_pred)
+                                       flow_graph.collapsed[collapsed_node_pred].insert(node_pred);
+                               if (collapsed_node != collapsed_node_pred)
+                               {
+                                       flow_graph.edges_bw[collapsed_node].insert(collapsed_node_pred);
+                                       flow_graph.edges_fw[collapsed_node_pred].insert(collapsed_node);
+                               }
+                               if (inputs[node_pred])
+                               {
+                                       flow_graph.edges_bw[collapsed_node_pred].insert(flow_graph.source);
+                                       flow_graph.edges_fw[flow_graph.source].insert(collapsed_node_pred);
+                               }
+
+                               if (!visited[node_pred])
+                                       worklist.insert(node_pred);
+                       }
+               }
+               return flow_graph;
+       }
+
+       FlowmapWorker(int order, pool<IdString> cell_types, bool debug, RTLIL::Module *module) :
+               order(order), cell_types(cell_types), debug(debug), module(module), sigmap(module), index(module)
+       {
+               log("Labeling cells.\n");
+               for (auto cell : module->selected_cells())
+               {
+                       if (cell_types[cell->type])
+                       {
+                               if (!cell->known())
+                               {
+                                       log_error("Cell %s (%s.%s) is unknown.\n", cell->type.c_str(), log_id(module), log_id(cell));
+                               }
+                               cells.insert(cell);
+
+                               for (auto conn : cell->connections())
+                               {
+                                       if (!cell->output(conn.first)) continue;
+                                       int offset = -1;
+                                       for (auto bit : conn.second)
+                                       {
+                                               offset++;
+                                               if (!bit.wire) continue;
+                                               auto mapped_bit = sigmap(bit);
+                                               if (nodes[mapped_bit])
+                                                       log_error("Multiple drivers found for wire %s.\n", log_signal(mapped_bit));
+                                               nodes.insert(mapped_bit);
+                                               node_origins[mapped_bit] = ModIndex::PortInfo(cell, conn.first, offset);
+                                               cell_fanout[cell].insert(mapped_bit);
+                                       }
+                               }
+
+                               int fanin = 0;
+                               for (auto conn : cell->connections())
+                               {
+                                       if (!cell->input(conn.first)) continue;
+                                       for (auto bit : sigmap(conn.second))
+                                       {
+                                               if (!bit.wire) continue;
+                                               for (auto fanout_bit : cell_fanout[cell])
+                                               {
+                                                       edges_fw[bit].insert(fanout_bit);
+                                                       edges_bw[fanout_bit].insert(bit);
+                                               }
+                                               fanin++;
+                                       }
+                               }
+
+                               if (fanin > order)
+                                       log_error("Cell %s (%s.%s) with fan-in %d cannot be mapped to a %d-LUT.\n",
+                                                 cell->type.c_str(), log_id(module), log_id(cell), fanin, order);
+                       }
+               }
+
+               for (auto edge : edges_fw)
+               {
+                       if (!nodes[edge.first])
+                       {
+                               inputs.insert(edge.first);
+                               nodes.insert(edge.first);
+                       }
+               }
+
+               for (auto node : nodes)
+               {
+                       auto node_info = index.query(node);
+                       if (node_info->is_output && !inputs[node])
+                               outputs.insert(node);
+                       for (auto port : node_info->ports)
+                               if (!cell_types[port.cell->type] && !inputs[node])
+                                       outputs.insert(node);
+               }
+
+               for (auto node : nodes)
+                       labels[node] = -1;
+               for (auto input : inputs)
+                       labels[input] = 0;
+
+               if (debug)
+               {
+                       dump_dot_graph("flowmap-init.dot");
+                       log("Dumped complete combinatorial graph to `flowmap-init.dot`.\n");
+               }
+
+               pool<RTLIL::SigBit> worklist = nodes;
+               int debug_num = 0;
+               while (!worklist.empty())
+               {
+                       auto sink = worklist.pop();
+                       if (labels[sink] != -1)
+                               continue;
+
+                       bool inputs_have_labels = true;
+                       for (auto sink_input : edges_bw[sink])
+                       {
+                               if (labels[sink_input] == -1)
+                               {
+                                       inputs_have_labels = false;
+                                       break;
+                               }
+                       }
+                       if (!inputs_have_labels)
+                               continue;
+
+                       if (debug)
+                       {
+                               debug_num++;
+                               log("Examining subgraph %d rooted in %s.\n", debug_num, log_signal(sink));
+                       }
+
+                       pool<RTLIL::SigBit> subgraph = find_subgraph(sink);
+
+                       int p = 1;
+                       for (auto subgraph_node : subgraph)
+                               p = max(p, labels[subgraph_node]);
+
+                       FlowGraph flow_graph = build_flow_graph(sink, p);
+                       int flow = flow_graph.maximum_flow(order);
+                       pool<RTLIL::SigBit> x, xi;
+                       if (flow <= order)
+                       {
+                               labels[sink] = p;
+                               auto cut = flow_graph.edge_cut();
+                               x = cut.first;
+                               xi = cut.second;
+                       }
+                       else
+                       {
+                               labels[sink] = p + 1;
+                               x = subgraph;
+                               x.erase(sink);
+                               xi.insert(sink);
+                       }
+                       lut_gates[sink] = xi;
+
+                       pool<RTLIL::SigBit> k;
+                       for (auto xi_node : xi)
+                       {
+                               for (auto xi_node_pred : edges_bw[xi_node])
+                                       if (x[xi_node_pred])
+                                               k.insert(xi_node_pred);
+                       }
+                       log_assert((int)k.size() <= order);
+                       lut_inputs[sink] = k;
+
+                       if (debug)
+                       {
+                               log("  Maximum flow: %d. Assigned label %d.\n", flow, labels[sink]);
+                               dump_dot_graph(stringf("flowmap-%d-sub.dot", debug_num), subgraph, {x, xi});
+                               log("  Dumped subgraph to `flowmap-%d-sub.dot`.\n", debug_num);
+                               flow_graph.dump_dot_graph(stringf("flowmap-%d-flow.dot", debug_num));
+                               log("  Dumped flow graph to `flowmap-%d-flow.dot`.\n", debug_num);
+                               log("    LUT packed:");
+                               for (auto xi_node : xi)
+                                       log(" %s", log_signal(xi_node));
+                               log(".\n");
+                               log("    LUT inputs:");
+                               for (auto k_node : k)
+                                       log(" %s", log_signal(k_node));
+                               log(".\n");
+                       }
+
+                       for (auto sink_succ : edges_fw[sink])
+                               worklist.insert(sink_succ);
+               }
+
+               if (debug)
+               {
+                       dump_dot_graph("flowmap-done.dot");
+                       log("Dumped complete combinatorial graph to `flowmap-done.dot`.\n");
+               }
+
+               int depth = 0;
+               for (auto label : labels)
+                       depth = max(depth, label.second);
+               log("Maximum depth: %d levels.\n", depth);
+
+               ConstEval ce(module);
+               for (auto input_node : inputs)
+                       ce.stop(input_node);
+
+               log("\n");
+               log("Mapping cells.\n");
+
+               pool<RTLIL::SigBit> mapped_nodes;
+               worklist = outputs;
+               while (!worklist.empty())
+               {
+                       auto node = worklist.pop();
+                       if (node_origins.count(node))
+                       {
+                               auto origin = node_origins[node];
+                               if (origin.cell->getPort(origin.port).size() == 1)
+                                       log("Mapping %s.%s.%s (%s).\n",
+                                           log_id(module), log_id(origin.cell), origin.port.c_str(), log_signal(node));
+                               else
+                                       log("Mapping %s.%s.%s [%d] (%s).\n",
+                                           log_id(module), log_id(origin.cell), origin.port.c_str(), origin.offset, log_signal(node));
+                       }
+                       else
+                       {
+                               log("Mapping %s.%s.\n", log_id(module), log_signal(node));
+                       }
+
+                       for (auto gate_node : lut_gates[node])
+                       {
+                               log_assert(node_origins.count(gate_node));
+
+                               if (gate_node == node)
+                                       continue;
+
+                               auto gate_origin = node_origins[gate_node];
+                               if (gate_origin.cell->getPort(gate_origin.port).size() == 1)
+                                       log("  Packing %s.%s.%s (%s).\n",
+                                           log_id(module), log_id(gate_origin.cell), gate_origin.port.c_str(), log_signal(gate_node));
+                               else
+                                       log("  Packing %s.%s.%s [%d] (%s).\n",
+                                           log_id(module), log_id(gate_origin.cell), gate_origin.port.c_str(), gate_origin.offset, log_signal(gate_node));
+                       }
+
+                       vector<RTLIL::SigBit> input_nodes(lut_inputs[node].begin(), lut_inputs[node].end());
+                       RTLIL::Const lut_table(State::Sx, 1 << input_nodes.size());
+                       for (unsigned i = 0; i < (1 << input_nodes.size()); i++)
+                       {
+                               ce.push();
+                               for (size_t n = 0; n < input_nodes.size(); n++)
+                                       ce.set(input_nodes[n], ((i >> n) & 1) ? State::S1 : State::S0);
+
+                               RTLIL::SigSpec value = node, undef;
+                               if (!ce.eval(value, undef))
+                               {
+                                       string env;
+                                       for (auto input_node : input_nodes)
+                                               env += stringf("  %s = %s\n", log_signal(input_node), log_signal(ce.values_map(input_node)));
+                                       log_error("Cannot evaluate %s because %s is not defined.\nEvaluation environment:\n%s",
+                                                 log_signal(node), log_signal(undef), env.c_str());
+                               }
+
+                               lut_table[i] = value.as_bool() ? State::S1 : State::S0;
+                               ce.pop();
+                       }
+
+                       RTLIL::SigSpec lut_a, lut_y = node;
+                       for (auto input_node : input_nodes)
+                               lut_a.append_bit(input_node);
+
+                       RTLIL::Cell *lut = module->addLut(NEW_ID, lut_a, lut_y, lut_table);
+                       mapped_count++;
+
+                       for (auto gate_node : lut_gates[node])
+                       {
+                               auto gate_origin = node_origins[gate_node];
+                               lut->add_strpool_attribute("\\src", gate_origin.cell->get_strpool_attribute("\\src"));
+                               packed_count++;
+                       }
+
+                       log("  Packed into a %d-LUT %s.%s.\n", (int)input_nodes.size(), log_id(module), log_id(lut));
+
+                       mapped_nodes.insert(node);
+                       for (auto input_node : input_nodes)
+                       {
+                               if (!mapped_nodes[input_node] && !inputs[input_node])
+                                       worklist.insert(input_node);
+                       }
+               }
+
+               unique_packed_count += nodes.size();
+
+               for (auto node : mapped_nodes)
+               {
+                       auto origin = node_origins[node];
+                       RTLIL::SigSpec driver = origin.cell->getPort(origin.port);
+                       driver[origin.offset] = module->addWire(NEW_ID);
+                       origin.cell->setPort(origin.port, driver);
+               }
+       }
+};
+
+static void split(std::vector<std::string> &tokens, const std::string &text, char sep)
+{
+       size_t start = 0, end = 0;
+       while ((end = text.find(sep, start)) != std::string::npos) {
+               tokens.push_back(text.substr(start, end - start));
+               start = end + 1;
+       }
+       tokens.push_back(text.substr(start));
+}
+
+struct FlowmapPass : public Pass {
+       FlowmapPass() : Pass("flowmap", "pack LUTs with FlowMap") { }
+       void help() YS_OVERRIDE
+       {
+               //   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+               log("\n");
+               log("    flowmap [options] [selection]\n");
+               log("\n");
+               log("This pass uses the FlowMap technology mapping algorithm to pack logic gates\n");
+               log("into k-LUTs with optimal depth. It allows mapping any circuit elements that can\n");
+               log("be evaluated with the `eval` pass, including cells with multiple output ports\n");
+               log("and multi-bit input and output ports.\n");
+               log("\n");
+               log("    -maxlut <k>\n");
+               log("        perform technology mapping for a k-LUT architecture. if not specified,\n");
+               log("        defaults to 3.\n");
+               log("\n");
+               log("    -cells <cell>[,<cell>,...]\n");
+               log("        map specified cells. if not specified, maps $_NOT_, $_AND_, $_OR_,\n");
+               log("        $_XOR_ and $_MUX_, which are the outputs of the `simplemap` pass.\n");
+               log("\n");
+               log("    -debug\n");
+               log("        dump intermediate graphs.\n");
+               log("\n");
+       }
+       void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
+       {
+               log_header(design, "Executing FLOWMAP pass (pack LUTs with FlowMap).\n");
+
+               int order = 3;
+               vector<string> cells;
+               bool debug = false;
+
+               size_t argidx;
+               for (argidx = 1; argidx < args.size(); argidx++)
+               {
+                       if (args[argidx] == "-maxlut" && argidx + 1 < args.size())
+                       {
+                               order = atoi(args[++argidx].c_str());
+                               continue;
+                       }
+                       if (args[argidx] == "-cells" && argidx + 1 < args.size())
+                       {
+                               split(cells, args[++argidx], ',');
+                               continue;
+                       }
+                       if (args[argidx] == "-debug")
+                       {
+                               debug = true;
+                               continue;
+                       }
+                       break;
+               }
+               extra_args(args, argidx, design);
+
+               pool<IdString> cell_types;
+               if (!cells.empty())
+               {
+                       for (auto &cell : cells)
+                               cell_types.insert(cell);
+               }
+               else
+               {
+                       cell_types = {"$_NOT_", "$_AND_", "$_OR_", "$_XOR_", "$_MUX_"};
+               }
+
+               int mapped_count = 0, packed_count = 0, unique_packed_count = 0;
+               for (auto module : design->selected_modules())
+               {
+                       FlowmapWorker worker(order, cell_types, debug, module);
+                       mapped_count += worker.mapped_count;
+                       packed_count += worker.packed_count;
+                       unique_packed_count += worker.unique_packed_count;
+               }
+
+               log("\n");
+               log("Mapped %d LUTs.\n", mapped_count);
+               log("Packed %d cells %d times.\n", unique_packed_count, packed_count);
+       }
+} FlowmapPass;
+
+PRIVATE_NAMESPACE_END