2 * yosys -- Yosys Open SYnthesis Suite
4 * Copyright (C) 2018 whitequark <whitequark@whitequark.org>
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include "kernel/yosys.h"
21 #include "kernel/sigtools.h"
22 #include "kernel/modtools.h"
25 PRIVATE_NAMESPACE_BEGIN
29 dict
<IdString
, dict
<int, IdString
>> &dlogic
;
30 RTLIL::Module
*module
;
34 pool
<RTLIL::Cell
*> luts
;
35 dict
<RTLIL::Cell
*, int> luts_arity
;
36 dict
<RTLIL::Cell
*, pool
<RTLIL::Cell
*>> luts_dlogics
;
37 dict
<RTLIL::Cell
*, pool
<int>> luts_dlogic_inputs
;
39 int eliminated_count
= 0, combined_count
= 0;
41 bool evaluate_lut(RTLIL::Cell
*lut
, dict
<SigBit
, bool> inputs
)
43 SigSpec lut_input
= sigmap(lut
->getPort("\\A"));
44 int lut_width
= lut
->getParam("\\WIDTH").as_int();
45 Const lut_table
= lut
->getParam("\\LUT");
48 for (int i
= 0; i
< lut_width
; i
++)
50 SigBit input
= sigmap(lut_input
[i
]);
51 if (inputs
.count(input
))
53 lut_index
|= inputs
[input
] << i
;
57 lut_index
|= SigSpec(lut_input
[i
]).as_bool() << i
;
61 return lut_table
.extract(lut_index
).as_bool();
64 void show_stats_by_arity()
66 dict
<int, int> arity_counts
;
67 dict
<IdString
, int> dlogic_counts
;
70 for (auto lut_arity
: luts_arity
)
72 max_arity
= max(max_arity
, lut_arity
.second
);
73 arity_counts
[lut_arity
.second
]++;
76 for (auto &lut_dlogics
: luts_dlogics
)
78 for (auto &lut_dlogic
: lut_dlogics
.second
)
80 dlogic_counts
[lut_dlogic
->type
]++;
84 log("Number of LUTs: %8zu\n", luts
.size());
85 for (int arity
= 1; arity
<= max_arity
; arity
++)
87 if (arity_counts
[arity
])
88 log(" %d-LUT %16d\n", arity
, arity_counts
[arity
]);
90 for (auto &dlogic_count
: dlogic_counts
)
92 log(" with %-12s %4d\n", dlogic_count
.first
.c_str(), dlogic_count
.second
);
96 OptLutWorker(dict
<IdString
, dict
<int, IdString
>> &dlogic
, RTLIL::Module
*module
, int limit
) :
97 dlogic(dlogic
), module(module
), index(module
), sigmap(module
)
99 log("Discovering LUTs.\n");
100 for (auto cell
: module
->selected_cells())
102 if (cell
->type
== "$lut")
104 int lut_width
= cell
->getParam("\\WIDTH").as_int();
105 SigSpec lut_input
= cell
->getPort("\\A");
108 log("Found $lut\\WIDTH=%d cell %s.%s.\n", lut_width
, log_id(module
), log_id(cell
));
111 // First, find all dedicated logic we're connected to. This results in an overapproximation
112 // of such connections.
113 pool
<RTLIL::Cell
*> lut_all_dlogics
;
114 for (int i
= 0; i
< lut_width
; i
++)
116 SigBit bit
= lut_input
[i
];
117 for (auto &port
: index
.query_ports(bit
))
119 if (dlogic
.count(port
.cell
->type
))
121 auto &dlogic_map
= dlogic
[port
.cell
->type
];
122 if (dlogic_map
.count(i
))
124 if (port
.port
== dlogic_map
[i
])
126 lut_all_dlogics
.insert(port
.cell
);
133 // Second, make sure that the connection to dedicated logic is legal. If it is not legal,
134 // it means one of the two things:
135 // * The connection is spurious. I.e. this is dedicated logic that will be packed
136 // with some other LUT, and it just happens to be conected to this LUT as well.
137 // * The connection is illegal.
138 // In either of these cases, we don't need to concern ourselves with preserving the connection
139 // between this LUT and this dedicated logic cell.
140 pool
<RTLIL::Cell
*> lut_legal_dlogics
;
141 pool
<int> lut_dlogic_inputs
;
142 for (auto lut_dlogic
: lut_all_dlogics
)
144 auto &dlogic_map
= dlogic
[lut_dlogic
->type
];
146 for (auto &dlogic_conn
: dlogic_map
)
148 if (lut_width
<= dlogic_conn
.first
)
150 log(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic
->type
.c_str(), log_id(module
), log_id(lut_dlogic
));
151 log(" LUT input A[%d] not present.\n", dlogic_conn
.first
);
155 if (sigmap(lut_input
[dlogic_conn
.first
]) != sigmap(lut_dlogic
->getPort(dlogic_conn
.second
)))
157 log(" LUT has illegal connection to %s cell %s.%s.\n", lut_dlogic
->type
.c_str(), log_id(module
), log_id(lut_dlogic
));
158 log(" LUT input A[%d] (wire %s) not connected to %s port %s (wire %s).\n", dlogic_conn
.first
, log_signal(lut_input
[dlogic_conn
.first
]), lut_dlogic
->type
.c_str(), dlogic_conn
.second
.c_str(), log_signal(lut_dlogic
->getPort(dlogic_conn
.second
)));
166 log(" LUT has legal connection to %s cell %s.%s.\n", lut_dlogic
->type
.c_str(), log_id(module
), log_id(lut_dlogic
));
167 lut_legal_dlogics
.insert(lut_dlogic
);
168 for (auto &dlogic_conn
: dlogic_map
)
169 lut_dlogic_inputs
.insert(dlogic_conn
.first
);
173 // Third, determine LUT arity. An n-wide LUT that has k constant inputs and m inputs shared with dedicated
174 // logic implements an (n-k-m)-ary function.
175 for (int i
= 0; i
< lut_width
; i
++)
177 SigBit bit
= lut_input
[i
];
178 if (bit
.wire
|| lut_dlogic_inputs
.count(i
))
182 log(" Cell implements a %d-LUT.\n", lut_arity
);
183 luts_arity
[cell
] = lut_arity
;
184 luts_dlogics
[cell
] = lut_legal_dlogics
;
185 luts_dlogic_inputs
[cell
] = lut_dlogic_inputs
;
188 show_stats_by_arity();
191 log("Eliminating LUTs.\n");
192 pool
<RTLIL::Cell
*> worklist
= luts
;
193 while (worklist
.size())
197 log("Limit reached.\n");
201 auto lut
= worklist
.pop();
202 SigSpec lut_input
= sigmap(lut
->getPort("\\A"));
203 pool
<int> &lut_dlogic_inputs
= luts_dlogic_inputs
[lut
];
205 vector
<SigBit
> lut_inputs
;
206 for (auto &bit
: lut_input
)
209 lut_inputs
.push_back(sigmap(bit
));
212 bool const0_match
= true;
213 bool const1_match
= true;
214 vector
<bool> input_matches
;
215 for (size_t i
= 0; i
< lut_inputs
.size(); i
++)
216 input_matches
.push_back(true);
218 for (int eval
= 0; eval
< 1 << lut_inputs
.size(); eval
++)
220 dict
<SigBit
, bool> eval_inputs
;
221 for (size_t i
= 0; i
< lut_inputs
.size(); i
++)
222 eval_inputs
[lut_inputs
[i
]] = (eval
>> i
) & 1;
223 bool value
= evaluate_lut(lut
, eval_inputs
);
225 const0_match
= false;
227 const1_match
= false;
228 for (size_t i
= 0; i
< lut_inputs
.size(); i
++)
230 if (value
!= eval_inputs
[lut_inputs
[i
]])
231 input_matches
[i
] = false;
235 int input_match
= -1;
236 for (size_t i
= 0; i
< lut_inputs
.size(); i
++)
237 if (input_matches
[i
])
240 if (const0_match
|| const1_match
|| input_match
!= -1)
242 log("Found redundant cell %s.%s.\n", log_id(module
), log_id(lut
));
247 log(" Cell evaluates constant 0.\n");
252 log(" Cell evaluates constant 1.\n");
255 if (input_match
!= -1) {
256 log(" Cell evaluates signal %s.\n", log_signal(lut_inputs
[input_match
]));
257 value
= lut_inputs
[input_match
];
260 if (lut_dlogic_inputs
.size())
262 log(" Not eliminating cell (connected to dedicated logic).\n");
266 SigSpec lut_output
= lut
->getPort("\\Y");
267 for (auto &port
: index
.query_ports(lut_output
))
269 if (port
.cell
!= lut
&& luts
.count(port
.cell
))
270 worklist
.insert(port
.cell
);
273 module
->connect(lut_output
, value
);
274 sigmap
.add(lut_output
, value
);
278 luts_arity
.erase(lut
);
279 luts_dlogics
.erase(lut
);
280 luts_dlogic_inputs
.erase(lut
);
288 show_stats_by_arity();
291 log("Combining LUTs.\n");
293 while (worklist
.size())
297 log("Limit reached.\n");
301 auto lutA
= worklist
.pop();
302 SigSpec lutA_input
= sigmap(lutA
->getPort("\\A"));
303 SigSpec lutA_output
= sigmap(lutA
->getPort("\\Y")[0]);
304 int lutA_width
= lutA
->getParam("\\WIDTH").as_int();
305 int lutA_arity
= luts_arity
[lutA
];
306 pool
<int> &lutA_dlogic_inputs
= luts_dlogic_inputs
[lutA
];
308 auto lutA_output_ports
= index
.query_ports(lutA
->getPort("\\Y"));
309 if (lutA_output_ports
.size() != 2)
312 for (auto &port
: lutA_output_ports
)
314 if (port
.cell
== lutA
)
317 if (luts
.count(port
.cell
))
319 auto lutB
= port
.cell
;
320 SigSpec lutB_input
= sigmap(lutB
->getPort("\\A"));
321 SigSpec lutB_output
= sigmap(lutB
->getPort("\\Y")[0]);
322 int lutB_width
= lutB
->getParam("\\WIDTH").as_int();
323 int lutB_arity
= luts_arity
[lutB
];
324 pool
<int> &lutB_dlogic_inputs
= luts_dlogic_inputs
[lutB
];
326 log("Found %s.%s (cell A) feeding %s.%s (cell B).\n", log_id(module
), log_id(lutA
), log_id(module
), log_id(lutB
));
328 if (index
.query_is_output(lutA
->getPort("\\Y")))
330 log(" Not combining LUTs (cascade connection feeds module output).\n");
334 pool
<SigBit
> lutA_inputs
;
335 pool
<SigBit
> lutB_inputs
;
336 for (auto &bit
: lutA_input
)
339 lutA_inputs
.insert(sigmap(bit
));
341 for (auto &bit
: lutB_input
)
344 lutB_inputs
.insert(sigmap(bit
));
347 pool
<SigBit
> common_inputs
;
348 for (auto &bit
: lutA_inputs
)
350 if (lutB_inputs
.count(bit
))
351 common_inputs
.insert(bit
);
354 int lutM_arity
= lutA_arity
+ lutB_arity
- 1 - common_inputs
.size();
355 if (lutA_dlogic_inputs
.size())
356 log(" Cell A is a %d-LUT with %zu dedicated connections. ", lutA_arity
, lutA_dlogic_inputs
.size());
358 log(" Cell A is a %d-LUT. ", lutA_arity
);
359 if (lutB_dlogic_inputs
.size())
360 log("Cell B is a %d-LUT with %zu dedicated connections.\n", lutB_arity
, lutB_dlogic_inputs
.size());
362 log("Cell B is a %d-LUT.\n", lutB_arity
);
363 log(" Cells share %zu input(s) and can be merged into one %d-LUT.\n", common_inputs
.size(), lutM_arity
);
365 const int COMBINE_A
= 1, COMBINE_B
= 2, COMBINE_EITHER
= COMBINE_A
| COMBINE_B
;
366 int combine_mask
= 0;
367 if (lutM_arity
> lutA_width
)
369 log(" Not combining LUTs into cell A (combined LUT wider than cell A).\n");
371 else if (lutB_dlogic_inputs
.size() > 0)
373 log(" Not combining LUTs into cell A (cell B is connected to dedicated logic).\n");
375 else if (lutB
->get_bool_attribute("\\lut_keep"))
377 log(" Not combining LUTs into cell A (cell B has attribute \\lut_keep).\n");
381 combine_mask
|= COMBINE_A
;
383 if (lutM_arity
> lutB_width
)
385 log(" Not combining LUTs into cell B (combined LUT wider than cell B).\n");
387 else if (lutA_dlogic_inputs
.size() > 0)
389 log(" Not combining LUTs into cell B (cell A is connected to dedicated logic).\n");
391 else if (lutA
->get_bool_attribute("\\lut_keep"))
393 log(" Not combining LUTs into cell B (cell A has attribute \\lut_keep).\n");
397 combine_mask
|= COMBINE_B
;
400 int combine
= combine_mask
;
401 if (combine
== COMBINE_EITHER
)
403 log(" Can combine into either cell.\n");
406 log(" Cell A is a buffer or inverter, combining into cell B.\n");
409 else if (lutB_arity
== 1)
411 log(" Cell B is a buffer or inverter, combining into cell A.\n");
416 log(" Arbitrarily combining into cell A.\n");
421 RTLIL::Cell
*lutM
, *lutR
;
422 pool
<SigBit
> lutM_inputs
, lutR_inputs
;
423 pool
<int> lutM_dlogic_inputs
;
424 if (combine
== COMBINE_A
)
426 log(" Combining LUTs into cell A.\n");
428 lutM_inputs
= lutA_inputs
;
429 lutM_dlogic_inputs
= lutA_dlogic_inputs
;
431 lutR_inputs
= lutB_inputs
;
433 else if (combine
== COMBINE_B
)
435 log(" Combining LUTs into cell B.\n");
437 lutM_inputs
= lutB_inputs
;
438 lutM_dlogic_inputs
= lutB_dlogic_inputs
;
440 lutR_inputs
= lutA_inputs
;
444 log(" Cannot combine LUTs.\n");
448 pool
<SigBit
> lutR_unique
;
449 for (auto &bit
: lutR_inputs
)
451 if (!common_inputs
.count(bit
) && bit
!= lutA_output
)
452 lutR_unique
.insert(bit
);
455 int lutM_width
= lutM
->getParam("\\WIDTH").as_int();
456 SigSpec lutM_input
= sigmap(lutM
->getPort("\\A"));
457 std::vector
<SigBit
> lutM_new_inputs
;
458 for (int i
= 0; i
< lutM_width
; i
++)
460 bool input_unused
= false;
461 if (sigmap(lutM_input
[i
]) == lutA_output
)
463 if (!lutM_input
[i
].wire
&& !lutM_dlogic_inputs
.count(i
))
466 if (input_unused
&& lutR_unique
.size())
468 SigBit new_input
= lutR_unique
.pop();
469 log(" Connecting input %d as %s.\n", i
, log_signal(new_input
));
470 lutM_new_inputs
.push_back(new_input
);
472 else if (sigmap(lutM_input
[i
]) == lutA_output
)
474 log(" Disconnecting cascade input %d.\n", i
);
475 lutM_new_inputs
.push_back(SigBit());
479 log(" Leaving input %d as %s.\n", i
, log_signal(lutM_input
[i
]));
480 lutM_new_inputs
.push_back(lutM_input
[i
]);
483 log_assert(lutR_unique
.size() == 0);
485 RTLIL::Const
lutM_new_table(State::Sx
, 1 << lutM_width
);
486 for (int eval
= 0; eval
< 1 << lutM_width
; eval
++)
488 dict
<SigBit
, bool> eval_inputs
;
489 for (size_t i
= 0; i
< lutM_new_inputs
.size(); i
++)
491 eval_inputs
[lutM_new_inputs
[i
]] = (eval
>> i
) & 1;
493 eval_inputs
[lutA_output
] = evaluate_lut(lutA
, eval_inputs
);
494 lutM_new_table
[eval
] = (RTLIL::State
) evaluate_lut(lutB
, eval_inputs
);
497 log(" Cell A truth table: %s.\n", lutA
->getParam("\\LUT").as_string().c_str());
498 log(" Cell B truth table: %s.\n", lutB
->getParam("\\LUT").as_string().c_str());
499 log(" Merged truth table: %s.\n", lutM_new_table
.as_string().c_str());
501 lutM
->setParam("\\LUT", lutM_new_table
);
502 lutM
->setPort("\\A", lutM_new_inputs
);
503 lutM
->setPort("\\Y", lutB_output
);
505 luts_arity
[lutM
] = lutM_arity
;
507 luts_arity
.erase(lutR
);
508 lutR
->module
->remove(lutR
);
510 worklist
.insert(lutM
);
511 worklist
.erase(lutR
);
519 show_stats_by_arity();
523 static void split(std::vector
<std::string
> &tokens
, const std::string
&text
, char sep
)
525 size_t start
= 0, end
= 0;
526 while ((end
= text
.find(sep
, start
)) != std::string::npos
) {
527 tokens
.push_back(text
.substr(start
, end
- start
));
530 tokens
.push_back(text
.substr(start
));
533 struct OptLutPass
: public Pass
{
534 OptLutPass() : Pass("opt_lut", "optimize LUT cells") { }
535 void help() YS_OVERRIDE
537 // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
539 log(" opt_lut [options] [selection]\n");
541 log("This pass combines cascaded $lut cells with unused inputs.\n");
543 log(" -dlogic <type>:<cell-port>=<LUT-input>[:<cell-port>=<LUT-input>...]\n");
544 log(" preserve connections to dedicated logic cell <type> that has ports\n");
545 log(" <cell-port> connected to LUT inputs <LUT-input>. this includes\n");
546 log(" the case where both LUT and dedicated logic input are connected to\n");
547 log(" the same constant.\n");
550 log(" only perform the first N combines, then stop. useful for debugging.\n");
553 void execute(std::vector
<std::string
> args
, RTLIL::Design
*design
) YS_OVERRIDE
555 log_header(design
, "Executing OPT_LUT pass (optimize LUTs).\n");
557 dict
<IdString
, dict
<int, IdString
>> dlogic
;
561 for (argidx
= 1; argidx
< args
.size(); argidx
++)
563 if (args
[argidx
] == "-dlogic" && argidx
+1 < args
.size())
565 std::vector
<std::string
> tokens
;
566 split(tokens
, args
[++argidx
], ':');
567 if (tokens
.size() < 2)
568 log_cmd_error("The -dlogic option requires at least one connection.\n");
569 IdString type
= "\\" + tokens
[0];
570 for (auto it
= tokens
.begin() + 1; it
!= tokens
.end(); ++it
) {
571 std::vector
<std::string
> conn_tokens
;
572 split(conn_tokens
, *it
, '=');
573 if (conn_tokens
.size() != 2)
574 log_cmd_error("Invalid format of -dlogic signal mapping.\n");
575 IdString logic_port
= "\\" + conn_tokens
[0];
576 int lut_input
= atoi(conn_tokens
[1].c_str());
577 dlogic
[type
][lut_input
] = logic_port
;
581 if (args
[argidx
] == "-limit" && argidx
+ 1 < args
.size())
583 limit
= atoi(args
[++argidx
].c_str());
588 extra_args(args
, argidx
, design
);
590 int eliminated_count
= 0, combined_count
= 0;
591 for (auto module
: design
->selected_modules())
593 OptLutWorker
worker(dlogic
, module
, limit
- eliminated_count
- combined_count
);
594 eliminated_count
+= worker
.eliminated_count
;
595 combined_count
+= worker
.combined_count
;
597 if (eliminated_count
)
598 design
->scratchpad_set_bool("opt.did_something", true);
600 design
->scratchpad_set_bool("opt.did_something", true);
602 log("Eliminated %d LUTs.\n", eliminated_count
);
603 log("Combined %d LUTs.\n", combined_count
);
607 PRIVATE_NAMESPACE_END