Merge branch 'master' into wandwor
[yosys.git] / passes / techmap / muxcover.cc
1 /*
2 * yosys -- Yosys Open SYnthesis Suite
3 *
4 * Copyright (C) 2012 Clifford Wolf <clifford@clifford.at>
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 */
19
20 #include "kernel/yosys.h"
21 #include "kernel/sigtools.h"
22
23 USING_YOSYS_NAMESPACE
24 PRIVATE_NAMESPACE_BEGIN
25
26 #define COST_MUX2 100
27 #define COST_MUX4 220
28 #define COST_MUX8 460
29 #define COST_MUX16 940
30
31 struct MuxcoverWorker
32 {
33 Module *module;
34 SigMap sigmap;
35
36 struct newmux_t
37 {
38 int cost;
39 vector<SigBit> inputs, selects;
40 newmux_t() : cost(0) {}
41 };
42
43 struct tree_t
44 {
45 SigBit root;
46 dict<SigBit, Cell*> muxes;
47 dict<SigBit, newmux_t> newmuxes;
48 };
49
50 vector<tree_t> tree_list;
51
52 dict<tuple<SigBit, SigBit, SigBit>, tuple<SigBit, pool<SigBit>, bool>> decode_mux_cache;
53 dict<SigBit, tuple<SigBit, SigBit, SigBit>> decode_mux_reverse_cache;
54 int decode_mux_counter;
55
56 bool use_mux4;
57 bool use_mux8;
58 bool use_mux16;
59 bool nodecode;
60
61 MuxcoverWorker(Module *module) : module(module), sigmap(module)
62 {
63 use_mux4 = false;
64 use_mux8 = false;
65 use_mux16 = false;
66 nodecode = false;
67 decode_mux_counter = 0;
68 }
69
70 void treeify()
71 {
72 pool<SigBit> roots;
73 pool<SigBit> used_once;
74 dict<SigBit, Cell*> sig_to_mux;
75
76 for (auto wire : module->wires()) {
77 if (!wire->port_output)
78 continue;
79 for (auto bit : sigmap(wire))
80 roots.insert(bit);
81 }
82
83 for (auto cell : module->cells()) {
84 for (auto conn : cell->connections()) {
85 if (!cell->input(conn.first))
86 continue;
87 for (auto bit : sigmap(conn.second)) {
88 if (used_once.count(bit) || cell->type != "$_MUX_" || conn.first == "\\S")
89 roots.insert(bit);
90 used_once.insert(bit);
91 }
92 }
93 if (cell->type == "$_MUX_")
94 sig_to_mux[sigmap(cell->getPort("\\Y"))] = cell;
95 }
96
97 log(" Treeifying %d MUXes:\n", GetSize(sig_to_mux));
98
99 roots.sort();
100 for (auto rootsig : roots)
101 {
102 tree_t tree;
103 tree.root = rootsig;
104
105 pool<SigBit> wavefront;
106 wavefront.insert(rootsig);
107
108 while (!wavefront.empty()) {
109 SigBit bit = wavefront.pop();
110 if (sig_to_mux.count(bit) && (bit == rootsig || !roots.count(bit))) {
111 Cell *c = sig_to_mux.at(bit);
112 tree.muxes[bit] = c;
113 wavefront.insert(sigmap(c->getPort("\\A")));
114 wavefront.insert(sigmap(c->getPort("\\B")));
115 }
116 }
117
118 if (!tree.muxes.empty()) {
119 log(" Found tree with %d MUXes at root %s.\n", GetSize(tree.muxes), log_signal(tree.root));
120 tree_list.push_back(tree);
121 }
122 }
123
124 log(" Finished treeification: Found %d trees.\n", GetSize(tree_list));
125 }
126
127 bool follow_muxtree(SigBit &ret_bit, tree_t &tree, SigBit bit, const char *path)
128 {
129 if (*path) {
130 if (tree.muxes.count(bit) == 0)
131 return false;
132 char port_name[3] = {'\\', *path, 0};
133 return follow_muxtree(ret_bit, tree, sigmap(tree.muxes.at(bit)->getPort(port_name)), path+1);
134 } else {
135 ret_bit = bit;
136 return true;
137 }
138 }
139
140 int prepare_decode_mux(SigBit &A, SigBit B, SigBit sel, SigBit bit)
141 {
142 if (A == B)
143 return 0;
144
145 tuple<SigBit, SigBit, SigBit> key(A, B, sel);
146 if (decode_mux_cache.count(key) == 0) {
147 auto &entry = decode_mux_cache[key];
148 std::get<0>(entry) = module->addWire(NEW_ID);
149 std::get<2>(entry) = false;
150 decode_mux_reverse_cache[std::get<0>(entry)] = key;
151 }
152
153 auto &entry = decode_mux_cache[key];
154 A = std::get<0>(entry);
155 std::get<1>(entry).insert(bit);
156
157 if (std::get<2>(entry))
158 return 0;
159
160 return COST_MUX2 / GetSize(std::get<1>(entry));
161 }
162
163 void implement_decode_mux(SigBit ctrl_bit)
164 {
165 if (decode_mux_reverse_cache.count(ctrl_bit) == 0)
166 return;
167
168 auto &key = decode_mux_reverse_cache.at(ctrl_bit);
169 auto &entry = decode_mux_cache[key];
170
171 if (std::get<2>(entry))
172 return;
173
174 implement_decode_mux(std::get<0>(key));
175 implement_decode_mux(std::get<1>(key));
176
177 module->addMuxGate(NEW_ID, std::get<0>(key), std::get<1>(key), std::get<2>(key), ctrl_bit);
178 std::get<2>(entry) = true;
179 decode_mux_counter++;
180 }
181
182 int find_best_cover(tree_t &tree, SigBit bit)
183 {
184 if (tree.newmuxes.count(bit)) {
185 return tree.newmuxes.at(bit).cost;
186 }
187
188 SigBit A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P;
189 SigBit S1, S2, S3, S4, S5, S6, S7, S8;
190 SigBit T1, T2, T3, T4;
191 SigBit U1, U2;
192 SigBit V1;
193
194 newmux_t best_mux;
195 bool ok = true;
196
197 // 2-Input MUX
198
199 ok = ok && follow_muxtree(A, tree, bit, "A");
200 ok = ok && follow_muxtree(B, tree, bit, "B");
201
202 ok = ok && follow_muxtree(S1, tree, bit, "S");
203
204 if (ok)
205 {
206 newmux_t mux;
207
208 mux.inputs.push_back(A);
209 mux.inputs.push_back(B);
210 mux.selects.push_back(S1);
211
212 mux.cost += COST_MUX2;
213 mux.cost += find_best_cover(tree, A);
214 mux.cost += find_best_cover(tree, B);
215
216 best_mux = mux;
217 }
218
219 // 4-Input MUX
220
221 if (use_mux4)
222 {
223 ok = ok && follow_muxtree(A, tree, bit, "AA");
224 ok = ok && follow_muxtree(B, tree, bit, "AB");
225 ok = ok && follow_muxtree(C, tree, bit, "BA");
226 ok = ok && follow_muxtree(D, tree, bit, "BB");
227
228 ok = ok && follow_muxtree(S1, tree, bit, "AS");
229 ok = ok && follow_muxtree(S2, tree, bit, "BS");
230
231 if (nodecode)
232 ok = ok && S1 == S2;
233
234 ok = ok && follow_muxtree(T1, tree, bit, "S");
235
236 if (ok)
237 {
238 newmux_t mux;
239
240 mux.inputs.push_back(A);
241 mux.inputs.push_back(B);
242 mux.inputs.push_back(C);
243 mux.inputs.push_back(D);
244
245 mux.cost += prepare_decode_mux(S1, S2, T1, bit);
246
247 mux.selects.push_back(S1);
248 mux.selects.push_back(T1);
249
250 mux.cost += COST_MUX4;
251 mux.cost += find_best_cover(tree, A);
252 mux.cost += find_best_cover(tree, B);
253 mux.cost += find_best_cover(tree, C);
254 mux.cost += find_best_cover(tree, D);
255
256 if (best_mux.cost > mux.cost)
257 best_mux = mux;
258 }
259 }
260
261 // 8-Input MUX
262
263 if (use_mux8)
264 {
265 ok = ok && follow_muxtree(A, tree, bit, "AAA");
266 ok = ok && follow_muxtree(B, tree, bit, "AAB");
267 ok = ok && follow_muxtree(C, tree, bit, "ABA");
268 ok = ok && follow_muxtree(D, tree, bit, "ABB");
269 ok = ok && follow_muxtree(E, tree, bit, "BAA");
270 ok = ok && follow_muxtree(F, tree, bit, "BAB");
271 ok = ok && follow_muxtree(G, tree, bit, "BBA");
272 ok = ok && follow_muxtree(H, tree, bit, "BBB");
273
274 ok = ok && follow_muxtree(S1, tree, bit, "AAS");
275 ok = ok && follow_muxtree(S2, tree, bit, "ABS");
276 ok = ok && follow_muxtree(S3, tree, bit, "BAS");
277 ok = ok && follow_muxtree(S4, tree, bit, "BBS");
278
279 if (nodecode)
280 ok = ok && S1 == S2 && S2 == S3 && S3 == S4;
281
282 ok = ok && follow_muxtree(T1, tree, bit, "AS");
283 ok = ok && follow_muxtree(T2, tree, bit, "BS");
284
285 if (nodecode)
286 ok = ok && T1 == T2;
287
288 ok = ok && follow_muxtree(U1, tree, bit, "S");
289
290 if (ok)
291 {
292 newmux_t mux;
293
294 mux.inputs.push_back(A);
295 mux.inputs.push_back(B);
296 mux.inputs.push_back(C);
297 mux.inputs.push_back(D);
298 mux.inputs.push_back(E);
299 mux.inputs.push_back(F);
300 mux.inputs.push_back(G);
301 mux.inputs.push_back(H);
302
303 mux.cost += prepare_decode_mux(S1, S2, T1, bit);
304 mux.cost += prepare_decode_mux(S3, S4, T2, bit);
305 mux.cost += prepare_decode_mux(S1, S3, U1, bit);
306
307 mux.cost += prepare_decode_mux(T1, T2, U1, bit);
308
309 mux.selects.push_back(S1);
310 mux.selects.push_back(T1);
311 mux.selects.push_back(U1);
312
313 mux.cost += COST_MUX8;
314 mux.cost += find_best_cover(tree, A);
315 mux.cost += find_best_cover(tree, B);
316 mux.cost += find_best_cover(tree, C);
317 mux.cost += find_best_cover(tree, D);
318 mux.cost += find_best_cover(tree, E);
319 mux.cost += find_best_cover(tree, F);
320 mux.cost += find_best_cover(tree, G);
321 mux.cost += find_best_cover(tree, H);
322
323 if (best_mux.cost > mux.cost)
324 best_mux = mux;
325 }
326 }
327
328 // 16-Input MUX
329
330 if (use_mux16)
331 {
332 ok = ok && follow_muxtree(A, tree, bit, "AAAA");
333 ok = ok && follow_muxtree(B, tree, bit, "AAAB");
334 ok = ok && follow_muxtree(C, tree, bit, "AABA");
335 ok = ok && follow_muxtree(D, tree, bit, "AABB");
336 ok = ok && follow_muxtree(E, tree, bit, "ABAA");
337 ok = ok && follow_muxtree(F, tree, bit, "ABAB");
338 ok = ok && follow_muxtree(G, tree, bit, "ABBA");
339 ok = ok && follow_muxtree(H, tree, bit, "ABBB");
340 ok = ok && follow_muxtree(I, tree, bit, "BAAA");
341 ok = ok && follow_muxtree(J, tree, bit, "BAAB");
342 ok = ok && follow_muxtree(K, tree, bit, "BABA");
343 ok = ok && follow_muxtree(L, tree, bit, "BABB");
344 ok = ok && follow_muxtree(M, tree, bit, "BBAA");
345 ok = ok && follow_muxtree(N, tree, bit, "BBAB");
346 ok = ok && follow_muxtree(O, tree, bit, "BBBA");
347 ok = ok && follow_muxtree(P, tree, bit, "BBBB");
348
349 ok = ok && follow_muxtree(S1, tree, bit, "AAAS");
350 ok = ok && follow_muxtree(S2, tree, bit, "AABS");
351 ok = ok && follow_muxtree(S3, tree, bit, "ABAS");
352 ok = ok && follow_muxtree(S4, tree, bit, "ABBS");
353 ok = ok && follow_muxtree(S5, tree, bit, "BAAS");
354 ok = ok && follow_muxtree(S6, tree, bit, "BABS");
355 ok = ok && follow_muxtree(S7, tree, bit, "BBAS");
356 ok = ok && follow_muxtree(S8, tree, bit, "BBBS");
357
358 if (nodecode)
359 ok = ok && S1 == S2 && S2 == S3 && S3 == S4 && S4 == S5 && S5 == S6 && S6 == S7 && S7 == S8;
360
361 ok = ok && follow_muxtree(T1, tree, bit, "AAS");
362 ok = ok && follow_muxtree(T2, tree, bit, "ABS");
363 ok = ok && follow_muxtree(T3, tree, bit, "BAS");
364 ok = ok && follow_muxtree(T4, tree, bit, "BBS");
365
366 if (nodecode)
367 ok = ok && T1 == T2 && T2 == T3 && T3 == T4;
368
369 ok = ok && follow_muxtree(U1, tree, bit, "AS");
370 ok = ok && follow_muxtree(U2, tree, bit, "BS");
371
372 if (nodecode)
373 ok = ok && U1 == U2;
374
375 ok = ok && follow_muxtree(V1, tree, bit, "S");
376
377 if (ok)
378 {
379 newmux_t mux;
380
381 mux.inputs.push_back(A);
382 mux.inputs.push_back(B);
383 mux.inputs.push_back(C);
384 mux.inputs.push_back(D);
385 mux.inputs.push_back(E);
386 mux.inputs.push_back(F);
387 mux.inputs.push_back(G);
388 mux.inputs.push_back(H);
389 mux.inputs.push_back(I);
390 mux.inputs.push_back(J);
391 mux.inputs.push_back(K);
392 mux.inputs.push_back(L);
393 mux.inputs.push_back(M);
394 mux.inputs.push_back(N);
395 mux.inputs.push_back(O);
396 mux.inputs.push_back(P);
397
398 mux.cost += prepare_decode_mux(S1, S2, T1, bit);
399 mux.cost += prepare_decode_mux(S3, S4, T2, bit);
400 mux.cost += prepare_decode_mux(S5, S6, T3, bit);
401 mux.cost += prepare_decode_mux(S7, S8, T4, bit);
402 mux.cost += prepare_decode_mux(S1, S3, U1, bit);
403 mux.cost += prepare_decode_mux(S5, S7, U2, bit);
404 mux.cost += prepare_decode_mux(S1, S5, V1, bit);
405
406 mux.cost += prepare_decode_mux(T1, T2, U1, bit);
407 mux.cost += prepare_decode_mux(T3, T4, U2, bit);
408 mux.cost += prepare_decode_mux(T1, T3, V1, bit);
409
410 mux.cost += prepare_decode_mux(U1, U2, V1, bit);
411
412 mux.selects.push_back(S1);
413 mux.selects.push_back(T1);
414 mux.selects.push_back(U1);
415 mux.selects.push_back(V1);
416
417 mux.cost += COST_MUX16;
418 mux.cost += find_best_cover(tree, A);
419 mux.cost += find_best_cover(tree, B);
420 mux.cost += find_best_cover(tree, C);
421 mux.cost += find_best_cover(tree, D);
422 mux.cost += find_best_cover(tree, E);
423 mux.cost += find_best_cover(tree, F);
424 mux.cost += find_best_cover(tree, G);
425 mux.cost += find_best_cover(tree, H);
426 mux.cost += find_best_cover(tree, I);
427 mux.cost += find_best_cover(tree, J);
428 mux.cost += find_best_cover(tree, K);
429 mux.cost += find_best_cover(tree, L);
430 mux.cost += find_best_cover(tree, M);
431 mux.cost += find_best_cover(tree, N);
432 mux.cost += find_best_cover(tree, O);
433 mux.cost += find_best_cover(tree, P);
434
435 if (best_mux.cost > mux.cost)
436 best_mux = mux;
437 }
438 }
439
440 tree.newmuxes[bit] = best_mux;
441 return best_mux.cost;
442 }
443
444 void implement_best_cover(tree_t &tree, SigBit bit, int count_muxes_by_type[4])
445 {
446 newmux_t mux = tree.newmuxes.at(bit);
447
448 for (auto inbit : mux.inputs)
449 implement_best_cover(tree, inbit, count_muxes_by_type);
450
451 for (auto selbit : mux.selects)
452 implement_decode_mux(selbit);
453
454 if (GetSize(mux.inputs) == 0)
455 return;
456
457 if (GetSize(mux.inputs) == 2) {
458 count_muxes_by_type[0]++;
459 Cell *cell = module->addCell(NEW_ID, "$_MUX_");
460 cell->setPort("\\A", mux.inputs[0]);
461 cell->setPort("\\B", mux.inputs[1]);
462 cell->setPort("\\S", mux.selects[0]);
463 cell->setPort("\\Y", bit);
464 return;
465 }
466
467 if (GetSize(mux.inputs) == 4) {
468 count_muxes_by_type[1]++;
469 Cell *cell = module->addCell(NEW_ID, "$_MUX4_");
470 cell->setPort("\\A", mux.inputs[0]);
471 cell->setPort("\\B", mux.inputs[1]);
472 cell->setPort("\\C", mux.inputs[2]);
473 cell->setPort("\\D", mux.inputs[3]);
474 cell->setPort("\\S", mux.selects[0]);
475 cell->setPort("\\T", mux.selects[1]);
476 cell->setPort("\\Y", bit);
477 return;
478 }
479
480 if (GetSize(mux.inputs) == 8) {
481 count_muxes_by_type[2]++;
482 Cell *cell = module->addCell(NEW_ID, "$_MUX8_");
483 cell->setPort("\\A", mux.inputs[0]);
484 cell->setPort("\\B", mux.inputs[1]);
485 cell->setPort("\\C", mux.inputs[2]);
486 cell->setPort("\\D", mux.inputs[3]);
487 cell->setPort("\\E", mux.inputs[4]);
488 cell->setPort("\\F", mux.inputs[5]);
489 cell->setPort("\\G", mux.inputs[6]);
490 cell->setPort("\\H", mux.inputs[7]);
491 cell->setPort("\\S", mux.selects[0]);
492 cell->setPort("\\T", mux.selects[1]);
493 cell->setPort("\\U", mux.selects[2]);
494 cell->setPort("\\Y", bit);
495 return;
496 }
497
498 if (GetSize(mux.inputs) == 16) {
499 count_muxes_by_type[3]++;
500 Cell *cell = module->addCell(NEW_ID, "$_MUX16_");
501 cell->setPort("\\A", mux.inputs[0]);
502 cell->setPort("\\B", mux.inputs[1]);
503 cell->setPort("\\C", mux.inputs[2]);
504 cell->setPort("\\D", mux.inputs[3]);
505 cell->setPort("\\E", mux.inputs[4]);
506 cell->setPort("\\F", mux.inputs[5]);
507 cell->setPort("\\G", mux.inputs[6]);
508 cell->setPort("\\H", mux.inputs[7]);
509 cell->setPort("\\I", mux.inputs[8]);
510 cell->setPort("\\J", mux.inputs[9]);
511 cell->setPort("\\K", mux.inputs[10]);
512 cell->setPort("\\L", mux.inputs[11]);
513 cell->setPort("\\M", mux.inputs[12]);
514 cell->setPort("\\N", mux.inputs[13]);
515 cell->setPort("\\O", mux.inputs[14]);
516 cell->setPort("\\P", mux.inputs[15]);
517 cell->setPort("\\S", mux.selects[0]);
518 cell->setPort("\\T", mux.selects[1]);
519 cell->setPort("\\U", mux.selects[2]);
520 cell->setPort("\\V", mux.selects[3]);
521 cell->setPort("\\Y", bit);
522 return;
523 }
524
525 log_abort();
526 }
527
528 void treecover(tree_t &tree)
529 {
530 int count_muxes_by_type[4] = {0, 0, 0, 0};
531 find_best_cover(tree, tree.root);
532 implement_best_cover(tree, tree.root, count_muxes_by_type);
533 log(" Replaced tree at %s: %d MUX2, %d MUX4, %d MUX8, %d MUX16\n", log_signal(tree.root),
534 count_muxes_by_type[0], count_muxes_by_type[1], count_muxes_by_type[2], count_muxes_by_type[3]);
535 for (auto &it : tree.muxes)
536 module->remove(it.second);
537 }
538
539 void run()
540 {
541 log("Covering MUX trees in module %s..\n", log_id(module));
542
543 treeify();
544
545 log(" Covering trees:\n");
546
547 // pre-fill cache of decoder muxes
548 if (!nodecode)
549 for (auto &tree : tree_list) {
550 find_best_cover(tree, tree.root);
551 tree.newmuxes.clear();
552 }
553
554 for (auto &tree : tree_list)
555 treecover(tree);
556
557 if (!nodecode)
558 log(" Added a total of %d decoder MUXes.\n", decode_mux_counter);
559 }
560 };
561
562 struct MuxcoverPass : public Pass {
563 MuxcoverPass() : Pass("muxcover", "cover trees of MUX cells with wider MUXes") { }
564 void help() YS_OVERRIDE
565 {
566 // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
567 log("\n");
568 log(" muxcover [options] [selection]\n");
569 log("\n");
570 log("Cover trees of $_MUX_ cells with $_MUX{4,8,16}_ cells\n");
571 log("\n");
572 log(" -mux4, -mux8, -mux16\n");
573 log(" Use the specified types of MUXes. If none of those options are used,\n");
574 log(" the effect is the same as if all of them where used.\n");
575 log("\n");
576 log(" -nodecode\n");
577 log(" Do not insert decoder logic. This reduces the number of possible\n");
578 log(" substitutions, but guarantees that the resulting circuit is not\n");
579 log(" less efficient than the original circuit.\n");
580 log("\n");
581 }
582 void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
583 {
584 log_header(design, "Executing MUXCOVER pass (mapping to wider MUXes).\n");
585
586 bool use_mux4 = false;
587 bool use_mux8 = false;
588 bool use_mux16 = false;
589 bool nodecode = false;
590
591 size_t argidx;
592 for (argidx = 1; argidx < args.size(); argidx++)
593 {
594 if (args[argidx] == "-mux4") {
595 use_mux4 = true;
596 continue;
597 }
598 if (args[argidx] == "-mux8") {
599 use_mux8 = true;
600 continue;
601 }
602 if (args[argidx] == "-mux16") {
603 use_mux16 = true;
604 continue;
605 }
606 if (args[argidx] == "-nodecode") {
607 nodecode = true;
608 continue;
609 }
610 break;
611 }
612 extra_args(args, argidx, design);
613
614 if (!use_mux4 && !use_mux8 && !use_mux16) {
615 use_mux4 = true;
616 use_mux8 = true;
617 use_mux16 = true;
618 }
619
620 for (auto module : design->selected_modules())
621 {
622 MuxcoverWorker worker(module);
623 worker.use_mux4 = use_mux4;
624 worker.use_mux8 = use_mux8;
625 worker.use_mux16 = use_mux16;
626 worker.nodecode = nodecode;
627 worker.run();
628 }
629 }
630 } MuxcoverPass;
631
632 PRIVATE_NAMESPACE_END