2 * yosys -- Yosys Open SYnthesis Suite
4 * Copyright (C) 2018 whitequark <whitequark@whitequark.org>
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include "kernel/yosys.h"
21 #include "kernel/sigtools.h"
22 #include "kernel/modtools.h"
25 PRIVATE_NAMESPACE_BEGIN
29 dict
<IdString
, dict
<int, IdString
>> &dlogic
;
30 RTLIL::Module
*module
;
34 pool
<RTLIL::Cell
*> luts
;
35 dict
<RTLIL::Cell
*, int> luts_arity
;
36 dict
<RTLIL::Cell
*, pool
<RTLIL::Cell
*>> luts_dlogics
;
37 dict
<RTLIL::Cell
*, pool
<int>> luts_dlogic_inputs
;
39 int combined_count
= 0;
41 bool evaluate_lut(RTLIL::Cell
*lut
, dict
<SigBit
, bool> inputs
)
43 SigSpec lut_input
= sigmap(lut
->getPort("\\A"));
44 int lut_width
= lut
->getParam("\\WIDTH").as_int();
45 Const lut_table
= lut
->getParam("\\LUT");
48 for (int i
= 0; i
< lut_width
; i
++)
50 SigBit input
= sigmap(lut_input
[i
]);
51 if (inputs
.count(input
))
53 lut_index
|= inputs
[input
] << i
;
57 lut_index
|= SigSpec(lut_input
[i
]).as_bool() << i
;
61 return lut_table
.extract(lut_index
).as_bool();
64 void show_stats_by_arity()
66 dict
<int, int> arity_counts
;
67 dict
<IdString
, int> dlogic_counts
;
70 for (auto lut_arity
: luts_arity
)
72 max_arity
= max(max_arity
, lut_arity
.second
);
73 arity_counts
[lut_arity
.second
]++;
76 for (auto &lut_dlogics
: luts_dlogics
)
78 for (auto &lut_dlogic
: lut_dlogics
.second
)
80 dlogic_counts
[lut_dlogic
->type
]++;
84 log("Number of LUTs: %8zu\n", luts
.size());
85 for (int arity
= 1; arity
<= max_arity
; arity
++)
87 if (arity_counts
[arity
])
88 log(" %d-LUT %16d\n", arity
, arity_counts
[arity
]);
90 for (auto &dlogic_count
: dlogic_counts
)
92 log(" with %-12s %4d\n", dlogic_count
.first
.c_str(), dlogic_count
.second
);
96 OptLutWorker(dict
<IdString
, dict
<int, IdString
>> &dlogic
, RTLIL::Module
*module
, int limit
) :
97 dlogic(dlogic
), module(module
), index(module
), sigmap(module
)
99 log("Discovering LUTs.\n");
100 for (auto cell
: module
->selected_cells())
102 if (cell
->type
== "$lut")
104 int lut_width
= cell
->getParam("\\WIDTH").as_int();
105 SigSpec lut_input
= cell
->getPort("\\A");
108 log("Found $lut\\WIDTH=%d cell %s.%s.\n", lut_width
, log_id(module
), log_id(cell
));
111 // First, find all dedicated logic we're connected to. This results in an overapproximation
112 // of such connections.
113 pool
<RTLIL::Cell
*> lut_all_dlogics
;
114 for (int i
= 0; i
< lut_width
; i
++)
116 SigBit bit
= lut_input
[i
];
117 for (auto &port
: index
.query_ports(bit
))
119 if (dlogic
.count(port
.cell
->type
))
121 auto &dlogic_map
= dlogic
[port
.cell
->type
];
122 if (dlogic_map
.count(i
))
124 if (port
.port
== dlogic_map
[i
])
126 lut_all_dlogics
.insert(port
.cell
);
133 // Second, make sure that the connection to dedicated logic is legal. If it is not legal,
134 // it means one of the two things:
135 // * The connection is spurious. I.e. this is dedicated logic that will be packed
136 // with some other LUT, and it just happens to be conected to this LUT as well.
137 // * The connection is illegal.
138 // In either of these cases, we don't need to concern ourselves with preserving the connection
139 // between this LUT and this dedicated logic cell.
140 pool
<RTLIL::Cell
*> lut_legal_dlogics
;
141 pool
<int> lut_dlogic_inputs
;
142 for (auto lut_dlogic
: lut_all_dlogics
)
144 auto &dlogic_map
= dlogic
[lut_dlogic
->type
];
146 for (auto &dlogic_conn
: dlogic_map
)
148 if (lut_width
<= dlogic_conn
.first
)
150 log(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic
->type
.c_str(), log_id(module
), log_id(lut_dlogic
));
151 log(" LUT input A[%d] not present.\n", dlogic_conn
.first
);
155 if (sigmap(lut_input
[dlogic_conn
.first
]) != sigmap(lut_dlogic
->getPort(dlogic_conn
.second
)))
157 log(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic
->type
.c_str(), log_id(module
), log_id(lut_dlogic
));
158 log(" LUT input A[%d] (wire %s) not connected to %s port %s (wire %s).\n", dlogic_conn
.first
, log_signal(lut_input
[dlogic_conn
.first
]), lut_dlogic
->type
.c_str(), dlogic_conn
.second
.c_str(), log_signal(lut_dlogic
->getPort(dlogic_conn
.second
)));
166 log(" LUT has legal connection to %s cell %s.%s.\n", lut_dlogic
->type
.c_str(), log_id(module
), log_id(lut_dlogic
));
167 lut_legal_dlogics
.insert(lut_dlogic
);
168 for (auto &dlogic_conn
: dlogic_map
)
169 lut_dlogic_inputs
.insert(dlogic_conn
.first
);
173 // Third, determine LUT arity. An n-wide LUT that has k constant inputs and m inputs shared with dedicated
174 // logic implements an (n-k-m)-ary function.
175 for (int i
= 0; i
< lut_width
; i
++)
177 SigBit bit
= lut_input
[i
];
178 if (bit
.wire
|| lut_dlogic_inputs
.count(i
))
182 log(" Cell implements a %d-LUT.\n", lut_arity
);
183 luts_arity
[cell
] = lut_arity
;
184 luts_dlogics
[cell
] = lut_legal_dlogics
;
185 luts_dlogic_inputs
[cell
] = lut_dlogic_inputs
;
188 show_stats_by_arity();
191 log("Combining LUTs.\n");
192 pool
<RTLIL::Cell
*> worklist
= luts
;
193 while (worklist
.size())
197 log("Limit reached.\n");
201 auto lutA
= worklist
.pop();
202 SigSpec lutA_input
= sigmap(lutA
->getPort("\\A"));
203 SigSpec lutA_output
= sigmap(lutA
->getPort("\\Y")[0]);
204 int lutA_width
= lutA
->getParam("\\WIDTH").as_int();
205 int lutA_arity
= luts_arity
[lutA
];
206 pool
<int> &lutA_dlogic_inputs
= luts_dlogic_inputs
[lutA
];
208 auto lutA_output_ports
= index
.query_ports(lutA
->getPort("\\Y"));
209 if (lutA_output_ports
.size() != 2)
212 for (auto &port
: lutA_output_ports
)
214 if (port
.cell
== lutA
)
217 if (luts
.count(port
.cell
))
219 auto lutB
= port
.cell
;
220 SigSpec lutB_input
= sigmap(lutB
->getPort("\\A"));
221 SigSpec lutB_output
= sigmap(lutB
->getPort("\\Y")[0]);
222 int lutB_width
= lutB
->getParam("\\WIDTH").as_int();
223 int lutB_arity
= luts_arity
[lutB
];
224 pool
<int> &lutB_dlogic_inputs
= luts_dlogic_inputs
[lutB
];
226 log("Found %s.%s (cell A) feeding %s.%s (cell B).\n", log_id(module
), log_id(lutA
), log_id(module
), log_id(lutB
));
228 if (index
.query_is_output(lutA
->getPort("\\Y")))
230 log(" Not combining LUTs (cascade connection feeds module output).\n");
234 pool
<SigBit
> lutA_inputs
;
235 pool
<SigBit
> lutB_inputs
;
236 for (auto &bit
: lutA_input
)
239 lutA_inputs
.insert(sigmap(bit
));
241 for (auto &bit
: lutB_input
)
244 lutB_inputs
.insert(sigmap(bit
));
247 pool
<SigBit
> common_inputs
;
248 for (auto &bit
: lutA_inputs
)
250 if (lutB_inputs
.count(bit
))
251 common_inputs
.insert(bit
);
254 int lutM_arity
= lutA_arity
+ lutB_arity
- 1 - common_inputs
.size();
255 if (lutA_dlogic_inputs
.size())
256 log(" Cell A is a %d-LUT with %zu dedicated connections. ", lutA_arity
, lutA_dlogic_inputs
.size());
258 log(" Cell A is a %d-LUT. ", lutA_arity
);
259 if (lutB_dlogic_inputs
.size())
260 log("Cell B is a %d-LUT with %zu dedicated connections.\n", lutB_arity
, lutB_dlogic_inputs
.size());
262 log("Cell B is a %d-LUT.\n", lutB_arity
);
263 log(" Cells share %zu input(s) and can be merged into one %d-LUT.\n", common_inputs
.size(), lutM_arity
);
265 const int COMBINE_A
= 1, COMBINE_B
= 2, COMBINE_EITHER
= COMBINE_A
| COMBINE_B
;
266 int combine_mask
= 0;
267 if (lutM_arity
> lutA_width
)
269 log(" Not combining LUTs into cell A (combined LUT wider than cell A).\n");
271 else if (lutB_dlogic_inputs
.size() > 0)
273 log(" Not combining LUTs into cell A (cell B is connected to dedicated logic).\n");
275 else if (lutB
->get_bool_attribute("\\lut_keep"))
277 log(" Not combining LUTs into cell A (cell B has attribute \\lut_keep).\n");
281 combine_mask
|= COMBINE_A
;
283 if (lutM_arity
> lutB_width
)
285 log(" Not combining LUTs into cell B (combined LUT wider than cell B).\n");
287 else if (lutA_dlogic_inputs
.size() > 0)
289 log(" Not combining LUTs into cell B (cell A is connected to dedicated logic).\n");
291 else if (lutA
->get_bool_attribute("\\lut_keep"))
293 log(" Not combining LUTs into cell B (cell A has attribute \\lut_keep).\n");
297 combine_mask
|= COMBINE_B
;
300 int combine
= combine_mask
;
301 if (combine
== COMBINE_EITHER
)
303 log(" Can combine into either cell.\n");
306 log(" Cell A is a buffer or inverter, combining into cell B.\n");
309 else if (lutB_arity
== 1)
311 log(" Cell B is a buffer or inverter, combining into cell A.\n");
316 log(" Arbitrarily combining into cell A.\n");
321 RTLIL::Cell
*lutM
, *lutR
;
322 pool
<SigBit
> lutM_inputs
, lutR_inputs
;
323 pool
<int> lutM_dlogic_inputs
;
324 if (combine
== COMBINE_A
)
326 log(" Combining LUTs into cell A.\n");
328 lutM_inputs
= lutA_inputs
;
329 lutM_dlogic_inputs
= lutA_dlogic_inputs
;
331 lutR_inputs
= lutB_inputs
;
333 else if (combine
== COMBINE_B
)
335 log(" Combining LUTs into cell B.\n");
337 lutM_inputs
= lutB_inputs
;
338 lutM_dlogic_inputs
= lutB_dlogic_inputs
;
340 lutR_inputs
= lutA_inputs
;
344 log(" Cannot combine LUTs.\n");
348 pool
<SigBit
> lutR_unique
;
349 for (auto &bit
: lutR_inputs
)
351 if (!common_inputs
.count(bit
) && bit
!= lutA_output
)
352 lutR_unique
.insert(bit
);
355 int lutM_width
= lutM
->getParam("\\WIDTH").as_int();
356 SigSpec lutM_input
= sigmap(lutM
->getPort("\\A"));
357 std::vector
<SigBit
> lutM_new_inputs
;
358 for (int i
= 0; i
< lutM_width
; i
++)
360 bool input_unused
= false;
361 if (sigmap(lutM_input
[i
]) == lutA_output
)
363 if (!lutM_input
[i
].wire
&& !lutM_dlogic_inputs
.count(i
))
366 if (input_unused
&& lutR_unique
.size())
368 SigBit new_input
= lutR_unique
.pop();
369 log(" Connecting input %d as %s.\n", i
, log_signal(new_input
));
370 lutM_new_inputs
.push_back(new_input
);
372 else if (sigmap(lutM_input
[i
]) == lutA_output
)
374 log(" Disconnecting cascade input %d.\n", i
);
375 lutM_new_inputs
.push_back(SigBit());
379 log(" Leaving input %d as %s.\n", i
, log_signal(lutM_input
[i
]));
380 lutM_new_inputs
.push_back(lutM_input
[i
]);
383 log_assert(lutR_unique
.size() == 0);
385 RTLIL::Const
lutM_new_table(State::Sx
, 1 << lutM_width
);
386 for (int eval
= 0; eval
< 1 << lutM_width
; eval
++)
388 dict
<SigBit
, bool> eval_inputs
;
389 for (size_t i
= 0; i
< lutM_new_inputs
.size(); i
++)
391 eval_inputs
[lutM_new_inputs
[i
]] = (eval
>> i
) & 1;
393 eval_inputs
[lutA_output
] = evaluate_lut(lutA
, eval_inputs
);
394 lutM_new_table
[eval
] = (RTLIL::State
) evaluate_lut(lutB
, eval_inputs
);
397 log(" Cell A truth table: %s.\n", lutA
->getParam("\\LUT").as_string().c_str());
398 log(" Cell B truth table: %s.\n", lutB
->getParam("\\LUT").as_string().c_str());
399 log(" Merged truth table: %s.\n", lutM_new_table
.as_string().c_str());
401 lutM
->setParam("\\LUT", lutM_new_table
);
402 lutM
->setPort("\\A", lutM_new_inputs
);
403 lutM
->setPort("\\Y", lutB_output
);
405 luts_arity
[lutM
] = lutM_arity
;
407 luts_arity
.erase(lutR
);
408 lutR
->module
->remove(lutR
);
410 worklist
.insert(lutM
);
411 worklist
.erase(lutR
);
419 show_stats_by_arity();
423 static void split(std::vector
<std::string
> &tokens
, const std::string
&text
, char sep
)
425 size_t start
= 0, end
= 0;
426 while ((end
= text
.find(sep
, start
)) != std::string::npos
) {
427 tokens
.push_back(text
.substr(start
, end
- start
));
430 tokens
.push_back(text
.substr(start
));
433 struct OptLutPass
: public Pass
{
434 OptLutPass() : Pass("opt_lut", "optimize LUT cells") { }
435 void help() YS_OVERRIDE
437 // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
439 log(" opt_lut [options] [selection]\n");
441 log("This pass combines cascaded $lut cells with unused inputs.\n");
443 log(" -dlogic <type>:<cell-port>=<LUT-input>[:<cell-port>=<LUT-input>...]\n");
444 log(" preserve connections to dedicated logic cell <type> that has ports\n");
445 log(" <cell-port> connected to LUT inputs <LUT-input>. this includes\n");
446 log(" the case where both LUT and dedicated logic input are connected to\n");
447 log(" the same constant.\n");
450 log(" only perform the first N combines, then stop. useful for debugging.\n");
453 void execute(std::vector
<std::string
> args
, RTLIL::Design
*design
) YS_OVERRIDE
455 log_header(design
, "Executing OPT_LUT pass (optimize LUTs).\n");
457 dict
<IdString
, dict
<int, IdString
>> dlogic
;
461 for (argidx
= 1; argidx
< args
.size(); argidx
++)
463 if (args
[argidx
] == "-dlogic" && argidx
+1 < args
.size())
465 std::vector
<std::string
> tokens
;
466 split(tokens
, args
[++argidx
], ':');
467 if (tokens
.size() < 2)
468 log_cmd_error("The -dlogic option requires at least one connection.\n");
469 IdString type
= "\\" + tokens
[0];
470 for (auto it
= tokens
.begin() + 1; it
!= tokens
.end(); ++it
) {
471 std::vector
<std::string
> conn_tokens
;
472 split(conn_tokens
, *it
, '=');
473 if (conn_tokens
.size() != 2)
474 log_cmd_error("Invalid format of -dlogic signal mapping.\n");
475 IdString logic_port
= "\\" + conn_tokens
[0];
476 int lut_input
= atoi(conn_tokens
[1].c_str());
477 dlogic
[type
][lut_input
] = logic_port
;
481 if (args
[argidx
] == "-limit" && argidx
+ 1 < args
.size())
483 limit
= atoi(args
[++argidx
].c_str());
488 extra_args(args
, argidx
, design
);
491 for (auto module
: design
->selected_modules())
493 OptLutWorker
worker(dlogic
, module
, limit
- total_count
);
494 total_count
+= worker
.combined_count
;
497 design
->scratchpad_set_bool("opt.did_something", true);
499 log("Combined %d LUTs.\n", total_count
);
503 PRIVATE_NAMESPACE_END