opt_lut: reflect changes in sigmap.
[yosys.git] / passes / opt / opt_lut.cc
1 /*
2 * yosys -- Yosys Open SYnthesis Suite
3 *
4 * Copyright (C) 2018 whitequark <whitequark@whitequark.org>
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 */
19
20 #include "kernel/yosys.h"
21 #include "kernel/sigtools.h"
22 #include "kernel/modtools.h"
23
24 USING_YOSYS_NAMESPACE
25 PRIVATE_NAMESPACE_BEGIN
26
27 struct OptLutWorker
28 {
29 dict<IdString, dict<int, IdString>> &dlogic;
30 RTLIL::Module *module;
31 ModIndex index;
32 SigMap sigmap;
33
34 pool<RTLIL::Cell*> luts;
35 dict<RTLIL::Cell*, int> luts_arity;
36 dict<RTLIL::Cell*, pool<RTLIL::Cell*>> luts_dlogics;
37 dict<RTLIL::Cell*, pool<int>> luts_dlogic_inputs;
38
39 int eliminated_count = 0, combined_count = 0;
40
41 bool evaluate_lut(RTLIL::Cell *lut, dict<SigBit, bool> inputs)
42 {
43 SigSpec lut_input = sigmap(lut->getPort("\\A"));
44 int lut_width = lut->getParam("\\WIDTH").as_int();
45 Const lut_table = lut->getParam("\\LUT");
46 int lut_index = 0;
47
48 for (int i = 0; i < lut_width; i++)
49 {
50 SigBit input = sigmap(lut_input[i]);
51 if (inputs.count(input))
52 {
53 lut_index |= inputs[input] << i;
54 }
55 else
56 {
57 lut_index |= SigSpec(lut_input[i]).as_bool() << i;
58 }
59 }
60
61 return lut_table.extract(lut_index).as_bool();
62 }
63
64 void show_stats_by_arity()
65 {
66 dict<int, int> arity_counts;
67 dict<IdString, int> dlogic_counts;
68 int max_arity = 0;
69
70 for (auto lut_arity : luts_arity)
71 {
72 max_arity = max(max_arity, lut_arity.second);
73 arity_counts[lut_arity.second]++;
74 }
75
76 for (auto &lut_dlogics : luts_dlogics)
77 {
78 for (auto &lut_dlogic : lut_dlogics.second)
79 {
80 dlogic_counts[lut_dlogic->type]++;
81 }
82 }
83
84 log("Number of LUTs: %8zu\n", luts.size());
85 for (int arity = 1; arity <= max_arity; arity++)
86 {
87 if (arity_counts[arity])
88 log(" %d-LUT %16d\n", arity, arity_counts[arity]);
89 }
90 for (auto &dlogic_count : dlogic_counts)
91 {
92 log(" with %-12s %4d\n", dlogic_count.first.c_str(), dlogic_count.second);
93 }
94 }
95
96 OptLutWorker(dict<IdString, dict<int, IdString>> &dlogic, RTLIL::Module *module, int limit) :
97 dlogic(dlogic), module(module), index(module), sigmap(module)
98 {
99 log("Discovering LUTs.\n");
100 for (auto cell : module->selected_cells())
101 {
102 if (cell->type == "$lut")
103 {
104 int lut_width = cell->getParam("\\WIDTH").as_int();
105 SigSpec lut_input = cell->getPort("\\A");
106 int lut_arity = 0;
107
108 log("Found $lut\\WIDTH=%d cell %s.%s.\n", lut_width, log_id(module), log_id(cell));
109 luts.insert(cell);
110
111 // First, find all dedicated logic we're connected to. This results in an overapproximation
112 // of such connections.
113 pool<RTLIL::Cell*> lut_all_dlogics;
114 for (int i = 0; i < lut_width; i++)
115 {
116 SigBit bit = lut_input[i];
117 for (auto &port : index.query_ports(bit))
118 {
119 if (dlogic.count(port.cell->type))
120 {
121 auto &dlogic_map = dlogic[port.cell->type];
122 if (dlogic_map.count(i))
123 {
124 if (port.port == dlogic_map[i])
125 {
126 lut_all_dlogics.insert(port.cell);
127 }
128 }
129 }
130 }
131 }
132
133 // Second, make sure that the connection to dedicated logic is legal. If it is not legal,
134 // it means one of the two things:
135 // * The connection is spurious. I.e. this is dedicated logic that will be packed
136 // with some other LUT, and it just happens to be conected to this LUT as well.
137 // * The connection is illegal.
138 // In either of these cases, we don't need to concern ourselves with preserving the connection
139 // between this LUT and this dedicated logic cell.
140 pool<RTLIL::Cell*> lut_legal_dlogics;
141 pool<int> lut_dlogic_inputs;
142 for (auto lut_dlogic : lut_all_dlogics)
143 {
144 auto &dlogic_map = dlogic[lut_dlogic->type];
145 bool legal = true;
146 for (auto &dlogic_conn : dlogic_map)
147 {
148 if (lut_width <= dlogic_conn.first)
149 {
150 log(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic->type.c_str(), log_id(module), log_id(lut_dlogic));
151 log(" LUT input A[%d] not present.\n", dlogic_conn.first);
152 legal = false;
153 break;
154 }
155 if (sigmap(lut_input[dlogic_conn.first]) != sigmap(lut_dlogic->getPort(dlogic_conn.second)))
156 {
157 log(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic->type.c_str(), log_id(module), log_id(lut_dlogic));
158 log(" LUT input A[%d] (wire %s) not connected to %s port %s (wire %s).\n", dlogic_conn.first, log_signal(lut_input[dlogic_conn.first]), lut_dlogic->type.c_str(), dlogic_conn.second.c_str(), log_signal(lut_dlogic->getPort(dlogic_conn.second)));
159 legal = false;
160 break;
161 }
162 }
163
164 if (legal)
165 {
166 log(" LUT has legal connection to %s cell %s.%s.\n", lut_dlogic->type.c_str(), log_id(module), log_id(lut_dlogic));
167 lut_legal_dlogics.insert(lut_dlogic);
168 for (auto &dlogic_conn : dlogic_map)
169 lut_dlogic_inputs.insert(dlogic_conn.first);
170 }
171 }
172
173 // Third, determine LUT arity. An n-wide LUT that has k constant inputs and m inputs shared with dedicated
174 // logic implements an (n-k-m)-ary function.
175 for (int i = 0; i < lut_width; i++)
176 {
177 SigBit bit = lut_input[i];
178 if (bit.wire || lut_dlogic_inputs.count(i))
179 lut_arity++;
180 }
181
182 log(" Cell implements a %d-LUT.\n", lut_arity);
183 luts_arity[cell] = lut_arity;
184 luts_dlogics[cell] = lut_legal_dlogics;
185 luts_dlogic_inputs[cell] = lut_dlogic_inputs;
186 }
187 }
188 show_stats_by_arity();
189
190 log("\n");
191 log("Eliminating LUTs.\n");
192 pool<RTLIL::Cell*> worklist = luts;
193 while (worklist.size())
194 {
195 if (limit == 0)
196 {
197 log("Limit reached.\n");
198 break;
199 }
200
201 auto lut = worklist.pop();
202 SigSpec lut_input = sigmap(lut->getPort("\\A"));
203 pool<int> &lut_dlogic_inputs = luts_dlogic_inputs[lut];
204
205 vector<SigBit> lut_inputs;
206 for (auto &bit : lut_input)
207 {
208 if (bit.wire)
209 lut_inputs.push_back(sigmap(bit));
210 }
211
212 bool const0_match = true;
213 bool const1_match = true;
214 vector<bool> input_matches;
215 for (size_t i = 0; i < lut_inputs.size(); i++)
216 input_matches.push_back(true);
217
218 for (int eval = 0; eval < 1 << lut_inputs.size(); eval++)
219 {
220 dict<SigBit, bool> eval_inputs;
221 for (size_t i = 0; i < lut_inputs.size(); i++)
222 eval_inputs[lut_inputs[i]] = (eval >> i) & 1;
223 bool value = evaluate_lut(lut, eval_inputs);
224 if (value != 0)
225 const0_match = false;
226 if (value != 1)
227 const1_match = false;
228 for (size_t i = 0; i < lut_inputs.size(); i++)
229 {
230 if (value != eval_inputs[lut_inputs[i]])
231 input_matches[i] = false;
232 }
233 }
234
235 int input_match = -1;
236 for (size_t i = 0; i < lut_inputs.size(); i++)
237 if (input_matches[i])
238 input_match = i;
239
240 if (const0_match || const1_match || input_match != -1)
241 {
242 log("Found redundant cell %s.%s.\n", log_id(module), log_id(lut));
243
244 SigBit value;
245 if (const0_match)
246 {
247 log(" Cell evaluates constant 0.\n");
248 value = State::S0;
249 }
250 if (const1_match)
251 {
252 log(" Cell evaluates constant 1.\n");
253 value = State::S1;
254 }
255 if (input_match != -1) {
256 log(" Cell evaluates signal %s.\n", log_signal(lut_inputs[input_match]));
257 value = lut_inputs[input_match];
258 }
259
260 if (lut_dlogic_inputs.size())
261 {
262 log(" Not eliminating cell (connected to dedicated logic).\n");
263 }
264 else
265 {
266 SigSpec lut_output = lut->getPort("\\Y");
267 for (auto &port : index.query_ports(lut_output))
268 {
269 if (port.cell != lut && luts.count(port.cell))
270 worklist.insert(port.cell);
271 }
272
273 module->connect(lut_output, value);
274 sigmap.add(lut_output, value);
275
276 module->remove(lut);
277 luts.erase(lut);
278 luts_arity.erase(lut);
279 luts_dlogics.erase(lut);
280 luts_dlogic_inputs.erase(lut);
281
282 eliminated_count++;
283 if (limit > 0)
284 limit--;
285 }
286 }
287 }
288 show_stats_by_arity();
289
290 log("\n");
291 log("Combining LUTs.\n");
292 worklist = luts;
293 while (worklist.size())
294 {
295 if (limit == 0)
296 {
297 log("Limit reached.\n");
298 break;
299 }
300
301 auto lutA = worklist.pop();
302 SigSpec lutA_input = sigmap(lutA->getPort("\\A"));
303 SigSpec lutA_output = sigmap(lutA->getPort("\\Y")[0]);
304 int lutA_width = lutA->getParam("\\WIDTH").as_int();
305 int lutA_arity = luts_arity[lutA];
306 pool<int> &lutA_dlogic_inputs = luts_dlogic_inputs[lutA];
307
308 auto lutA_output_ports = index.query_ports(lutA->getPort("\\Y"));
309 if (lutA_output_ports.size() != 2)
310 continue;
311
312 for (auto &port : lutA_output_ports)
313 {
314 if (port.cell == lutA)
315 continue;
316
317 if (luts.count(port.cell))
318 {
319 auto lutB = port.cell;
320 SigSpec lutB_input = sigmap(lutB->getPort("\\A"));
321 SigSpec lutB_output = sigmap(lutB->getPort("\\Y")[0]);
322 int lutB_width = lutB->getParam("\\WIDTH").as_int();
323 int lutB_arity = luts_arity[lutB];
324 pool<int> &lutB_dlogic_inputs = luts_dlogic_inputs[lutB];
325
326 log("Found %s.%s (cell A) feeding %s.%s (cell B).\n", log_id(module), log_id(lutA), log_id(module), log_id(lutB));
327
328 if (index.query_is_output(lutA->getPort("\\Y")))
329 {
330 log(" Not combining LUTs (cascade connection feeds module output).\n");
331 continue;
332 }
333
334 pool<SigBit> lutA_inputs;
335 pool<SigBit> lutB_inputs;
336 for (auto &bit : lutA_input)
337 {
338 if (bit.wire)
339 lutA_inputs.insert(sigmap(bit));
340 }
341 for (auto &bit : lutB_input)
342 {
343 if (bit.wire)
344 lutB_inputs.insert(sigmap(bit));
345 }
346
347 pool<SigBit> common_inputs;
348 for (auto &bit : lutA_inputs)
349 {
350 if (lutB_inputs.count(bit))
351 common_inputs.insert(bit);
352 }
353
354 int lutM_arity = lutA_arity + lutB_arity - 1 - common_inputs.size();
355 if (lutA_dlogic_inputs.size())
356 log(" Cell A is a %d-LUT with %zu dedicated connections. ", lutA_arity, lutA_dlogic_inputs.size());
357 else
358 log(" Cell A is a %d-LUT. ", lutA_arity);
359 if (lutB_dlogic_inputs.size())
360 log("Cell B is a %d-LUT with %zu dedicated connections.\n", lutB_arity, lutB_dlogic_inputs.size());
361 else
362 log("Cell B is a %d-LUT.\n", lutB_arity);
363 log(" Cells share %zu input(s) and can be merged into one %d-LUT.\n", common_inputs.size(), lutM_arity);
364
365 const int COMBINE_A = 1, COMBINE_B = 2, COMBINE_EITHER = COMBINE_A | COMBINE_B;
366 int combine_mask = 0;
367 if (lutM_arity > lutA_width)
368 {
369 log(" Not combining LUTs into cell A (combined LUT wider than cell A).\n");
370 }
371 else if (lutB_dlogic_inputs.size() > 0)
372 {
373 log(" Not combining LUTs into cell A (cell B is connected to dedicated logic).\n");
374 }
375 else if (lutB->get_bool_attribute("\\lut_keep"))
376 {
377 log(" Not combining LUTs into cell A (cell B has attribute \\lut_keep).\n");
378 }
379 else
380 {
381 combine_mask |= COMBINE_A;
382 }
383 if (lutM_arity > lutB_width)
384 {
385 log(" Not combining LUTs into cell B (combined LUT wider than cell B).\n");
386 }
387 else if (lutA_dlogic_inputs.size() > 0)
388 {
389 log(" Not combining LUTs into cell B (cell A is connected to dedicated logic).\n");
390 }
391 else if (lutA->get_bool_attribute("\\lut_keep"))
392 {
393 log(" Not combining LUTs into cell B (cell A has attribute \\lut_keep).\n");
394 }
395 else
396 {
397 combine_mask |= COMBINE_B;
398 }
399
400 int combine = combine_mask;
401 if (combine == COMBINE_EITHER)
402 {
403 log(" Can combine into either cell.\n");
404 if (lutA_arity == 1)
405 {
406 log(" Cell A is a buffer or inverter, combining into cell B.\n");
407 combine = COMBINE_B;
408 }
409 else if (lutB_arity == 1)
410 {
411 log(" Cell B is a buffer or inverter, combining into cell A.\n");
412 combine = COMBINE_A;
413 }
414 else
415 {
416 log(" Arbitrarily combining into cell A.\n");
417 combine = COMBINE_A;
418 }
419 }
420
421 RTLIL::Cell *lutM, *lutR;
422 pool<SigBit> lutM_inputs, lutR_inputs;
423 pool<int> lutM_dlogic_inputs;
424 if (combine == COMBINE_A)
425 {
426 log(" Combining LUTs into cell A.\n");
427 lutM = lutA;
428 lutM_inputs = lutA_inputs;
429 lutM_dlogic_inputs = lutA_dlogic_inputs;
430 lutR = lutB;
431 lutR_inputs = lutB_inputs;
432 }
433 else if (combine == COMBINE_B)
434 {
435 log(" Combining LUTs into cell B.\n");
436 lutM = lutB;
437 lutM_inputs = lutB_inputs;
438 lutM_dlogic_inputs = lutB_dlogic_inputs;
439 lutR = lutA;
440 lutR_inputs = lutA_inputs;
441 }
442 else
443 {
444 log(" Cannot combine LUTs.\n");
445 continue;
446 }
447
448 pool<SigBit> lutR_unique;
449 for (auto &bit : lutR_inputs)
450 {
451 if (!common_inputs.count(bit) && bit != lutA_output)
452 lutR_unique.insert(bit);
453 }
454
455 int lutM_width = lutM->getParam("\\WIDTH").as_int();
456 SigSpec lutM_input = sigmap(lutM->getPort("\\A"));
457 std::vector<SigBit> lutM_new_inputs;
458 for (int i = 0; i < lutM_width; i++)
459 {
460 bool input_unused = false;
461 if (sigmap(lutM_input[i]) == lutA_output)
462 input_unused = true;
463 if (!lutM_input[i].wire && !lutM_dlogic_inputs.count(i))
464 input_unused = true;
465
466 if (input_unused && lutR_unique.size())
467 {
468 SigBit new_input = lutR_unique.pop();
469 log(" Connecting input %d as %s.\n", i, log_signal(new_input));
470 lutM_new_inputs.push_back(new_input);
471 }
472 else if (sigmap(lutM_input[i]) == lutA_output)
473 {
474 log(" Disconnecting cascade input %d.\n", i);
475 lutM_new_inputs.push_back(SigBit());
476 }
477 else
478 {
479 log(" Leaving input %d as %s.\n", i, log_signal(lutM_input[i]));
480 lutM_new_inputs.push_back(lutM_input[i]);
481 }
482 }
483 log_assert(lutR_unique.size() == 0);
484
485 RTLIL::Const lutM_new_table(State::Sx, 1 << lutM_width);
486 for (int eval = 0; eval < 1 << lutM_width; eval++)
487 {
488 dict<SigBit, bool> eval_inputs;
489 for (size_t i = 0; i < lutM_new_inputs.size(); i++)
490 {
491 eval_inputs[lutM_new_inputs[i]] = (eval >> i) & 1;
492 }
493 eval_inputs[lutA_output] = evaluate_lut(lutA, eval_inputs);
494 lutM_new_table[eval] = (RTLIL::State) evaluate_lut(lutB, eval_inputs);
495 }
496
497 log(" Cell A truth table: %s.\n", lutA->getParam("\\LUT").as_string().c_str());
498 log(" Cell B truth table: %s.\n", lutB->getParam("\\LUT").as_string().c_str());
499 log(" Merged truth table: %s.\n", lutM_new_table.as_string().c_str());
500
501 lutM->setParam("\\LUT", lutM_new_table);
502 lutM->setPort("\\A", lutM_new_inputs);
503 lutM->setPort("\\Y", lutB_output);
504
505 luts_arity[lutM] = lutM_arity;
506 luts.erase(lutR);
507 luts_arity.erase(lutR);
508 lutR->module->remove(lutR);
509
510 worklist.insert(lutM);
511 worklist.erase(lutR);
512
513 combined_count++;
514 if (limit > 0)
515 limit--;
516 }
517 }
518 }
519 show_stats_by_arity();
520 }
521 };
522
523 static void split(std::vector<std::string> &tokens, const std::string &text, char sep)
524 {
525 size_t start = 0, end = 0;
526 while ((end = text.find(sep, start)) != std::string::npos) {
527 tokens.push_back(text.substr(start, end - start));
528 start = end + 1;
529 }
530 tokens.push_back(text.substr(start));
531 }
532
533 struct OptLutPass : public Pass {
534 OptLutPass() : Pass("opt_lut", "optimize LUT cells") { }
535 void help() YS_OVERRIDE
536 {
537 // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
538 log("\n");
539 log(" opt_lut [options] [selection]\n");
540 log("\n");
541 log("This pass combines cascaded $lut cells with unused inputs.\n");
542 log("\n");
543 log(" -dlogic <type>:<cell-port>=<LUT-input>[:<cell-port>=<LUT-input>...]\n");
544 log(" preserve connections to dedicated logic cell <type> that has ports\n");
545 log(" <cell-port> connected to LUT inputs <LUT-input>. this includes\n");
546 log(" the case where both LUT and dedicated logic input are connected to\n");
547 log(" the same constant.\n");
548 log("\n");
549 log(" -limit N\n");
550 log(" only perform the first N combines, then stop. useful for debugging.\n");
551 log("\n");
552 }
553 void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
554 {
555 log_header(design, "Executing OPT_LUT pass (optimize LUTs).\n");
556
557 dict<IdString, dict<int, IdString>> dlogic;
558 int limit = -1;
559
560 size_t argidx;
561 for (argidx = 1; argidx < args.size(); argidx++)
562 {
563 if (args[argidx] == "-dlogic" && argidx+1 < args.size())
564 {
565 std::vector<std::string> tokens;
566 split(tokens, args[++argidx], ':');
567 if (tokens.size() < 2)
568 log_cmd_error("The -dlogic option requires at least one connection.\n");
569 IdString type = "\\" + tokens[0];
570 for (auto it = tokens.begin() + 1; it != tokens.end(); ++it) {
571 std::vector<std::string> conn_tokens;
572 split(conn_tokens, *it, '=');
573 if (conn_tokens.size() != 2)
574 log_cmd_error("Invalid format of -dlogic signal mapping.\n");
575 IdString logic_port = "\\" + conn_tokens[0];
576 int lut_input = atoi(conn_tokens[1].c_str());
577 dlogic[type][lut_input] = logic_port;
578 }
579 continue;
580 }
581 if (args[argidx] == "-limit" && argidx + 1 < args.size())
582 {
583 limit = atoi(args[++argidx].c_str());
584 continue;
585 }
586 break;
587 }
588 extra_args(args, argidx, design);
589
590 int eliminated_count = 0, combined_count = 0;
591 for (auto module : design->selected_modules())
592 {
593 OptLutWorker worker(dlogic, module, limit - eliminated_count - combined_count);
594 eliminated_count += worker.eliminated_count;
595 combined_count += worker.combined_count;
596 }
597 if (eliminated_count)
598 design->scratchpad_set_bool("opt.did_something", true);
599 if (combined_count)
600 design->scratchpad_set_bool("opt.did_something", true);
601 log("\n");
602 log("Eliminated %d LUTs.\n", eliminated_count);
603 log("Combined %d LUTs.\n", combined_count);
604 }
605 } OptLutPass;
606
607 PRIVATE_NAMESPACE_END