Merge pull request #1299 from YosysHQ/eddie/cleanup2
[yosys.git] / passes / opt / opt_lut.cc
1 /*
2 * yosys -- Yosys Open SYnthesis Suite
3 *
4 * Copyright (C) 2018 whitequark <whitequark@whitequark.org>
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 */
19
20 #include "kernel/yosys.h"
21 #include "kernel/sigtools.h"
22 #include "kernel/modtools.h"
23
24 USING_YOSYS_NAMESPACE
25 PRIVATE_NAMESPACE_BEGIN
26
27 struct OptLutWorker
28 {
29 dict<IdString, dict<int, IdString>> &dlogic;
30 RTLIL::Module *module;
31 ModIndex index;
32 SigMap sigmap;
33
34 pool<RTLIL::Cell*> luts;
35 dict<RTLIL::Cell*, int> luts_arity;
36 dict<RTLIL::Cell*, pool<RTLIL::Cell*>> luts_dlogics;
37 dict<RTLIL::Cell*, pool<int>> luts_dlogic_inputs;
38
39 int eliminated_count = 0, combined_count = 0;
40
41 bool evaluate_lut(RTLIL::Cell *lut, dict<SigBit, bool> inputs)
42 {
43 SigSpec lut_input = sigmap(lut->getPort(ID(A)));
44 int lut_width = lut->getParam(ID(WIDTH)).as_int();
45 Const lut_table = lut->getParam(ID(LUT));
46 int lut_index = 0;
47
48 for (int i = 0; i < lut_width; i++)
49 {
50 SigBit input = sigmap(lut_input[i]);
51 if (inputs.count(input))
52 {
53 lut_index |= inputs[input] << i;
54 }
55 else
56 {
57 lut_index |= SigSpec(lut_input[i]).as_bool() << i;
58 }
59 }
60
61 return lut_table.extract(lut_index).as_bool();
62 }
63
64 void show_stats_by_arity()
65 {
66 dict<int, int> arity_counts;
67 dict<IdString, int> dlogic_counts;
68 int max_arity = 0;
69
70 for (auto lut_arity : luts_arity)
71 {
72 max_arity = max(max_arity, lut_arity.second);
73 arity_counts[lut_arity.second]++;
74 }
75
76 for (auto &lut_dlogics : luts_dlogics)
77 {
78 for (auto &lut_dlogic : lut_dlogics.second)
79 {
80 dlogic_counts[lut_dlogic->type]++;
81 }
82 }
83
84 log("Number of LUTs: %8d\n", GetSize(luts));
85 for (int arity = 1; arity <= max_arity; arity++)
86 {
87 if (arity_counts[arity])
88 log(" %d-LUT %16d\n", arity, arity_counts[arity]);
89 }
90 for (auto &dlogic_count : dlogic_counts)
91 {
92 log(" with %-12s %4d\n", dlogic_count.first.c_str(), dlogic_count.second);
93 }
94 }
95
96 OptLutWorker(dict<IdString, dict<int, IdString>> &dlogic, RTLIL::Module *module, int limit) :
97 dlogic(dlogic), module(module), index(module), sigmap(module)
98 {
99 log("Discovering LUTs.\n");
100 for (auto cell : module->selected_cells())
101 {
102 if (cell->type == ID($lut))
103 {
104 if (cell->has_keep_attr())
105 continue;
106 SigBit lut_output = cell->getPort(ID(Y));
107 if (lut_output.wire->get_bool_attribute(ID(keep)))
108 continue;
109
110 int lut_width = cell->getParam(ID(WIDTH)).as_int();
111 SigSpec lut_input = cell->getPort(ID(A));
112 int lut_arity = 0;
113
114 log_debug("Found $lut\\WIDTH=%d cell %s.%s.\n", lut_width, log_id(module), log_id(cell));
115 luts.insert(cell);
116
117 // First, find all dedicated logic we're connected to. This results in an overapproximation
118 // of such connections.
119 pool<RTLIL::Cell*> lut_all_dlogics;
120 for (int i = 0; i < lut_width; i++)
121 {
122 SigBit bit = lut_input[i];
123 for (auto &port : index.query_ports(bit))
124 {
125 if (dlogic.count(port.cell->type))
126 {
127 auto &dlogic_map = dlogic[port.cell->type];
128 if (dlogic_map.count(i))
129 {
130 if (port.port == dlogic_map[i])
131 {
132 lut_all_dlogics.insert(port.cell);
133 }
134 }
135 }
136 }
137 }
138
139 // Second, make sure that the connection to dedicated logic is legal. If it is not legal,
140 // it means one of the two things:
141 // * The connection is spurious. I.e. this is dedicated logic that will be packed
142 // with some other LUT, and it just happens to be connected to this LUT as well.
143 // * The connection is illegal.
144 // In either of these cases, we don't need to concern ourselves with preserving the connection
145 // between this LUT and this dedicated logic cell.
146 pool<RTLIL::Cell*> lut_legal_dlogics;
147 pool<int> lut_dlogic_inputs;
148 for (auto lut_dlogic : lut_all_dlogics)
149 {
150 auto &dlogic_map = dlogic[lut_dlogic->type];
151 bool legal = true;
152 for (auto &dlogic_conn : dlogic_map)
153 {
154 if (lut_width <= dlogic_conn.first)
155 {
156 log_debug(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic->type.c_str(), log_id(module), log_id(lut_dlogic));
157 log_debug(" LUT input A[%d] not present.\n", dlogic_conn.first);
158 legal = false;
159 break;
160 }
161 if (sigmap(lut_input[dlogic_conn.first]) != sigmap(lut_dlogic->getPort(dlogic_conn.second)))
162 {
163 log_debug(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic->type.c_str(), log_id(module), log_id(lut_dlogic));
164 log_debug(" LUT input A[%d] (wire %s) not connected to %s port %s (wire %s).\n", dlogic_conn.first, log_signal(lut_input[dlogic_conn.first]), lut_dlogic->type.c_str(), dlogic_conn.second.c_str(), log_signal(lut_dlogic->getPort(dlogic_conn.second)));
165 legal = false;
166 break;
167 }
168 }
169
170 if (legal)
171 {
172 log_debug(" LUT has legal connection to %s cell %s.%s.\n", lut_dlogic->type.c_str(), log_id(module), log_id(lut_dlogic));
173 lut_legal_dlogics.insert(lut_dlogic);
174 for (auto &dlogic_conn : dlogic_map)
175 lut_dlogic_inputs.insert(dlogic_conn.first);
176 }
177 }
178
179 // Third, determine LUT arity. An n-wide LUT that has k constant inputs and m inputs shared with dedicated
180 // logic implements an (n-k-m)-ary function.
181 for (int i = 0; i < lut_width; i++)
182 {
183 SigBit bit = lut_input[i];
184 if (bit.wire || lut_dlogic_inputs.count(i))
185 lut_arity++;
186 }
187
188 log_debug(" Cell implements a %d-LUT.\n", lut_arity);
189 luts_arity[cell] = lut_arity;
190 luts_dlogics[cell] = lut_legal_dlogics;
191 luts_dlogic_inputs[cell] = lut_dlogic_inputs;
192 }
193 }
194 show_stats_by_arity();
195
196 log("\n");
197 log("Eliminating LUTs.\n");
198 pool<RTLIL::Cell*> worklist = luts;
199 while (worklist.size())
200 {
201 if (limit == 0)
202 {
203 log("Limit reached.\n");
204 break;
205 }
206
207 auto lut = worklist.pop();
208 SigSpec lut_input = sigmap(lut->getPort(ID(A)));
209 pool<int> &lut_dlogic_inputs = luts_dlogic_inputs[lut];
210
211 vector<SigBit> lut_inputs;
212 for (auto &bit : lut_input)
213 {
214 if (bit.wire)
215 lut_inputs.push_back(sigmap(bit));
216 }
217
218 bool const0_match = true;
219 bool const1_match = true;
220 vector<bool> input_matches;
221 for (size_t i = 0; i < lut_inputs.size(); i++)
222 input_matches.push_back(true);
223
224 for (int eval = 0; eval < 1 << lut_inputs.size(); eval++)
225 {
226 dict<SigBit, bool> eval_inputs;
227 for (size_t i = 0; i < lut_inputs.size(); i++)
228 eval_inputs[lut_inputs[i]] = (eval >> i) & 1;
229 bool value = evaluate_lut(lut, eval_inputs);
230 if (value != 0)
231 const0_match = false;
232 if (value != 1)
233 const1_match = false;
234 for (size_t i = 0; i < lut_inputs.size(); i++)
235 {
236 if (value != eval_inputs[lut_inputs[i]])
237 input_matches[i] = false;
238 }
239 }
240
241 int input_match = -1;
242 for (size_t i = 0; i < lut_inputs.size(); i++)
243 if (input_matches[i])
244 input_match = i;
245
246 if (const0_match || const1_match || input_match != -1)
247 {
248 log_debug("Found redundant cell %s.%s.\n", log_id(module), log_id(lut));
249
250 SigBit value;
251 if (const0_match)
252 {
253 log_debug(" Cell evaluates constant 0.\n");
254 value = State::S0;
255 }
256 if (const1_match)
257 {
258 log_debug(" Cell evaluates constant 1.\n");
259 value = State::S1;
260 }
261 if (input_match != -1) {
262 log_debug(" Cell evaluates signal %s.\n", log_signal(lut_inputs[input_match]));
263 value = lut_inputs[input_match];
264 }
265
266 if (lut_dlogic_inputs.size())
267 log_debug(" Not eliminating cell (connected to dedicated logic).\n");
268 else
269 {
270 SigSpec lut_output = lut->getPort(ID(Y));
271 for (auto &port : index.query_ports(lut_output))
272 {
273 if (port.cell != lut && luts.count(port.cell))
274 worklist.insert(port.cell);
275 }
276
277 module->connect(lut_output, value);
278 sigmap.add(lut_output, value);
279
280 module->remove(lut);
281 luts.erase(lut);
282 luts_arity.erase(lut);
283 luts_dlogics.erase(lut);
284 luts_dlogic_inputs.erase(lut);
285
286 eliminated_count++;
287 if (limit > 0)
288 limit--;
289 }
290 }
291 }
292 show_stats_by_arity();
293
294 log("\n");
295 log("Combining LUTs.\n");
296 worklist = luts;
297 while (worklist.size())
298 {
299 if (limit == 0)
300 {
301 log("Limit reached.\n");
302 break;
303 }
304
305 auto lutA = worklist.pop();
306 SigSpec lutA_input = sigmap(lutA->getPort(ID(A)));
307 SigSpec lutA_output = sigmap(lutA->getPort(ID(Y))[0]);
308 int lutA_width = lutA->getParam(ID(WIDTH)).as_int();
309 int lutA_arity = luts_arity[lutA];
310 pool<int> &lutA_dlogic_inputs = luts_dlogic_inputs[lutA];
311
312 auto lutA_output_ports = index.query_ports(lutA->getPort(ID(Y)));
313 if (lutA_output_ports.size() != 2)
314 continue;
315
316 for (auto &port : lutA_output_ports)
317 {
318 if (port.cell == lutA)
319 continue;
320
321 if (luts.count(port.cell))
322 {
323 auto lutB = port.cell;
324 SigSpec lutB_input = sigmap(lutB->getPort(ID(A)));
325 SigSpec lutB_output = sigmap(lutB->getPort(ID(Y))[0]);
326 int lutB_width = lutB->getParam(ID(WIDTH)).as_int();
327 int lutB_arity = luts_arity[lutB];
328 pool<int> &lutB_dlogic_inputs = luts_dlogic_inputs[lutB];
329
330 log_debug("Found %s.%s (cell A) feeding %s.%s (cell B).\n", log_id(module), log_id(lutA), log_id(module), log_id(lutB));
331
332 if (index.query_is_output(lutA->getPort(ID(Y))))
333 {
334 log_debug(" Not combining LUTs (cascade connection feeds module output).\n");
335 continue;
336 }
337
338 pool<SigBit> lutA_inputs;
339 pool<SigBit> lutB_inputs;
340 for (auto &bit : lutA_input)
341 {
342 if (bit.wire)
343 lutA_inputs.insert(sigmap(bit));
344 }
345 for (auto &bit : lutB_input)
346 {
347 if (bit.wire)
348 lutB_inputs.insert(sigmap(bit));
349 }
350
351 pool<SigBit> common_inputs;
352 for (auto &bit : lutA_inputs)
353 {
354 if (lutB_inputs.count(bit))
355 common_inputs.insert(bit);
356 }
357
358 int lutM_arity = lutA_arity + lutB_arity - 1 - common_inputs.size();
359 if (lutA_dlogic_inputs.size())
360 log_debug(" Cell A is a %d-LUT with %d dedicated connections. ", lutA_arity, GetSize(lutA_dlogic_inputs));
361 else
362 log_debug(" Cell A is a %d-LUT. ", lutA_arity);
363 if (lutB_dlogic_inputs.size())
364 log_debug("Cell B is a %d-LUT with %d dedicated connections.\n", lutB_arity, GetSize(lutB_dlogic_inputs));
365 else
366 log_debug("Cell B is a %d-LUT.\n", lutB_arity);
367 log_debug(" Cells share %d input(s) and can be merged into one %d-LUT.\n", GetSize(common_inputs), lutM_arity);
368
369 const int COMBINE_A = 1, COMBINE_B = 2, COMBINE_EITHER = COMBINE_A | COMBINE_B;
370 int combine_mask = 0;
371 if (lutM_arity > lutA_width)
372 log_debug(" Not combining LUTs into cell A (combined LUT wider than cell A).\n");
373 else if (lutB_dlogic_inputs.size() > 0)
374 log_debug(" Not combining LUTs into cell A (cell B is connected to dedicated logic).\n");
375 else if (lutB->get_bool_attribute(ID(lut_keep)))
376 log_debug(" Not combining LUTs into cell A (cell B has attribute \\lut_keep).\n");
377 else
378 combine_mask |= COMBINE_A;
379 if (lutM_arity > lutB_width)
380 log_debug(" Not combining LUTs into cell B (combined LUT wider than cell B).\n");
381 else if (lutA_dlogic_inputs.size() > 0)
382 log_debug(" Not combining LUTs into cell B (cell A is connected to dedicated logic).\n");
383 else if (lutA->get_bool_attribute(ID(lut_keep)))
384 log_debug(" Not combining LUTs into cell B (cell A has attribute \\lut_keep).\n");
385 else
386 combine_mask |= COMBINE_B;
387
388 int combine = combine_mask;
389 if (combine == COMBINE_EITHER)
390 {
391 log_debug(" Can combine into either cell.\n");
392 if (lutA_arity == 1)
393 {
394 log_debug(" Cell A is a buffer or inverter, combining into cell B.\n");
395 combine = COMBINE_B;
396 }
397 else if (lutB_arity == 1)
398 {
399 log_debug(" Cell B is a buffer or inverter, combining into cell A.\n");
400 combine = COMBINE_A;
401 }
402 else
403 {
404 log_debug(" Arbitrarily combining into cell A.\n");
405 combine = COMBINE_A;
406 }
407 }
408
409 RTLIL::Cell *lutM, *lutR;
410 pool<SigBit> lutM_inputs, lutR_inputs;
411 pool<int> lutM_dlogic_inputs;
412 if (combine == COMBINE_A)
413 {
414 log_debug(" Combining LUTs into cell A.\n");
415 lutM = lutA;
416 lutM_inputs = lutA_inputs;
417 lutM_dlogic_inputs = lutA_dlogic_inputs;
418 lutR = lutB;
419 lutR_inputs = lutB_inputs;
420 }
421 else if (combine == COMBINE_B)
422 {
423 log_debug(" Combining LUTs into cell B.\n");
424 lutM = lutB;
425 lutM_inputs = lutB_inputs;
426 lutM_dlogic_inputs = lutB_dlogic_inputs;
427 lutR = lutA;
428 lutR_inputs = lutA_inputs;
429 }
430 else
431 {
432 log_debug(" Cannot combine LUTs.\n");
433 continue;
434 }
435
436 pool<SigBit> lutR_unique;
437 for (auto &bit : lutR_inputs)
438 {
439 if (!common_inputs.count(bit) && bit != lutA_output)
440 lutR_unique.insert(bit);
441 }
442
443 int lutM_width = lutM->getParam(ID(WIDTH)).as_int();
444 SigSpec lutM_input = sigmap(lutM->getPort(ID(A)));
445 std::vector<SigBit> lutM_new_inputs;
446 for (int i = 0; i < lutM_width; i++)
447 {
448 bool input_unused = false;
449 if (sigmap(lutM_input[i]) == lutA_output)
450 input_unused = true;
451 if (!lutM_input[i].wire && !lutM_dlogic_inputs.count(i))
452 input_unused = true;
453
454 if (input_unused && lutR_unique.size())
455 {
456 SigBit new_input = lutR_unique.pop();
457 log_debug(" Connecting input %d as %s.\n", i, log_signal(new_input));
458 lutM_new_inputs.push_back(new_input);
459 }
460 else if (sigmap(lutM_input[i]) == lutA_output)
461 {
462 log_debug(" Disconnecting cascade input %d.\n", i);
463 lutM_new_inputs.push_back(SigBit());
464 }
465 else
466 {
467 log_debug(" Leaving input %d as %s.\n", i, log_signal(lutM_input[i]));
468 lutM_new_inputs.push_back(lutM_input[i]);
469 }
470 }
471 log_assert(lutR_unique.size() == 0);
472
473 RTLIL::Const lutM_new_table(State::Sx, 1 << lutM_width);
474 for (int eval = 0; eval < 1 << lutM_width; eval++)
475 {
476 dict<SigBit, bool> eval_inputs;
477 for (size_t i = 0; i < lutM_new_inputs.size(); i++)
478 {
479 eval_inputs[lutM_new_inputs[i]] = (eval >> i) & 1;
480 }
481 eval_inputs[lutA_output] = evaluate_lut(lutA, eval_inputs);
482 lutM_new_table[eval] = (RTLIL::State) evaluate_lut(lutB, eval_inputs);
483 }
484
485 log_debug(" Cell A truth table: %s.\n", lutA->getParam(ID(LUT)).as_string().c_str());
486 log_debug(" Cell B truth table: %s.\n", lutB->getParam(ID(LUT)).as_string().c_str());
487 log_debug(" Merged truth table: %s.\n", lutM_new_table.as_string().c_str());
488
489 lutM->setParam(ID(LUT), lutM_new_table);
490 lutM->setPort(ID(A), lutM_new_inputs);
491 lutM->setPort(ID(Y), lutB_output);
492
493 luts_arity[lutM] = lutM_arity;
494 luts.erase(lutR);
495 luts_arity.erase(lutR);
496 lutR->module->remove(lutR);
497
498 worklist.insert(lutM);
499 worklist.erase(lutR);
500
501 combined_count++;
502 if (limit > 0)
503 limit--;
504 }
505 }
506 }
507 show_stats_by_arity();
508 }
509 };
510
511 static void split(std::vector<std::string> &tokens, const std::string &text, char sep)
512 {
513 size_t start = 0, end = 0;
514 while ((end = text.find(sep, start)) != std::string::npos) {
515 tokens.push_back(text.substr(start, end - start));
516 start = end + 1;
517 }
518 tokens.push_back(text.substr(start));
519 }
520
521 struct OptLutPass : public Pass {
522 OptLutPass() : Pass("opt_lut", "optimize LUT cells") { }
523 void help() YS_OVERRIDE
524 {
525 // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
526 log("\n");
527 log(" opt_lut [options] [selection]\n");
528 log("\n");
529 log("This pass combines cascaded $lut cells with unused inputs.\n");
530 log("\n");
531 log(" -dlogic <type>:<cell-port>=<LUT-input>[:<cell-port>=<LUT-input>...]\n");
532 log(" preserve connections to dedicated logic cell <type> that has ports\n");
533 log(" <cell-port> connected to LUT inputs <LUT-input>. this includes\n");
534 log(" the case where both LUT and dedicated logic input are connected to\n");
535 log(" the same constant.\n");
536 log("\n");
537 log(" -limit N\n");
538 log(" only perform the first N combines, then stop. useful for debugging.\n");
539 log("\n");
540 }
541 void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
542 {
543 log_header(design, "Executing OPT_LUT pass (optimize LUTs).\n");
544
545 dict<IdString, dict<int, IdString>> dlogic;
546 int limit = -1;
547
548 size_t argidx;
549 for (argidx = 1; argidx < args.size(); argidx++)
550 {
551 if (args[argidx] == "-dlogic" && argidx+1 < args.size())
552 {
553 std::vector<std::string> tokens;
554 split(tokens, args[++argidx], ':');
555 if (tokens.size() < 2)
556 log_cmd_error("The -dlogic option requires at least one connection.\n");
557 IdString type = "\\" + tokens[0];
558 for (auto it = tokens.begin() + 1; it != tokens.end(); ++it) {
559 std::vector<std::string> conn_tokens;
560 split(conn_tokens, *it, '=');
561 if (conn_tokens.size() != 2)
562 log_cmd_error("Invalid format of -dlogic signal mapping.\n");
563 IdString logic_port = "\\" + conn_tokens[0];
564 int lut_input = atoi(conn_tokens[1].c_str());
565 dlogic[type][lut_input] = logic_port;
566 }
567 continue;
568 }
569 if (args[argidx] == "-limit" && argidx + 1 < args.size())
570 {
571 limit = atoi(args[++argidx].c_str());
572 continue;
573 }
574 break;
575 }
576 extra_args(args, argidx, design);
577
578 int eliminated_count = 0, combined_count = 0;
579 for (auto module : design->selected_modules())
580 {
581 OptLutWorker worker(dlogic, module, limit - eliminated_count - combined_count);
582 eliminated_count += worker.eliminated_count;
583 combined_count += worker.combined_count;
584 }
585 if (eliminated_count)
586 design->scratchpad_set_bool("opt.did_something", true);
587 if (combined_count)
588 design->scratchpad_set_bool("opt.did_something", true);
589 log("\n");
590 log("Eliminated %d LUTs.\n", eliminated_count);
591 log("Combined %d LUTs.\n", combined_count);
592 }
593 } OptLutPass;
594
595 PRIVATE_NAMESPACE_END