1 // This file describes the main pattern matcher setup (of three total) that
2 // forms the `xilinx_dsp` pass described in xilinx_dsp.cc
3 // At a high level, it works as follows:
4 // ( 1) Starting from a DSP48E1 cell
5 // ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
6 // (attached to at most two $mux cells that implement clock-enable or
7 // reset functionality, using a subpattern discussed below)
8 // If ADREG matched, treat 'A' input as input of ADREG
9 // ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
11 // ( 4) If pre-adder was present, find match 'A' input for A2REG
12 // If pre-adder was not present, move ADREG to A2REG
13 // If A2REG, then match 'A' input for A1REG
14 // ( 5) Match 'B' input for B2REG
15 // If B2REG, then match 'B' input for B1REG
16 // ( 6) Match 'D' input for DREG
17 // ( 7) Match 'P' output that exclusively drives an MREG
18 // ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
20 // The other input to the adder is assumed to come in from the 'C' input
21 // (note: 'P' -> 'C' connections that exist for accumulators are
22 // recognised in xilinx_dsp.cc).
23 // ( 9) Match 'P' output that exclusively drives a PREG
24 // (10) If post-adder and PREG both present, match for a $mux cell driving
25 // the 'C' input, where one of the $mux's inputs is the PREG output.
26 // This indicates an accumulator situation, and one where a $mux exists
27 // to override the accumulated value:
28 // +--------------------------------+
33 // | /-------\ +----+ |
34 // +----+ +-| post- |___|PREG|---+ 'P'
35 // |MREG|------ | adder | +----+
37 // (11) If PREG present, match for a greater-than-or-equal $ge cell attached
38 // to the 'P' output where it is compared to a constant that is a
39 // power-of-2: e.g. `assign overflow = (PREG >= 2**40);`
40 // In this scenario, the pattern detector functionality of a DSP48E1 can
41 // to implement this function
43 // - The intention of this pattern matcher is for it to be compatible with
44 // DSP48E1 cells inferred from multiply operations by Yosys, as well as for
45 // user instantiations that may already contain the cells being packed...
46 // (though the latter is currently untested)
47 // - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used
48 // for each *REG match, it has been factored out into two subpatterns:
49 // in_dffe and out_dffe located at the bottom of this file.
50 // - Matching for pattern detector features is currently incomplete. For
51 // example, matching for underflow as well as overflow detection is
52 // possible, as would auto-reset, enabling saturated arithmetic, detecting
53 // custom patterns, etc.
55 pattern xilinx_dsp_pack
58 state <SigSpec> sigA sigB sigC sigD sigM sigP
59 state <IdString> postAddAB postAddMuxAB
60 state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol
61 state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol
62 state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux
63 state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux
64 state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux
66 // Variables used for subpatterns
67 state <SigSpec> argQ argD
68 state <bool> ffcepol ffrstpol
70 udata <SigSpec> dffD dffQ
71 udata <SigBit> dffclock
72 udata <Cell*> dff dffcemux dffrstmux
73 udata <bool> dffcepol dffrstpol
75 // (1) Starting from a DSP48E1 cell
77 select dsp->type.in(\DSP48E1)
80 code sigA sigB sigC sigD sigM clock
81 auto unextend = [](const SigSpec &sig) {
83 for (i = GetSize(sig)-1; i > 0; i--)
84 if (sig[i] != sig[i-1])
86 // Do not remove non-const sign bit
89 return sig.extract(0, i);
91 sigA = unextend(port(dsp, \A));
92 sigB = unextend(port(dsp, \B));
94 sigC = port(dsp, \C, SigSpec());
95 sigD = port(dsp, \D, SigSpec());
97 SigSpec P = port(dsp, \P);
98 if (param(dsp, \USE_MULT).decode_string() == "MULTIPLY") {
99 // Only care about those bits that are used
101 for (i = GetSize(P)-1; i >= 0; i--)
102 if (nusers(P[i]) > 1)
105 log_assert(nusers(P.extract_end(i)) <= 1);
106 // This sigM could have no users if downstream sinks (e.g. $add) is
107 // narrower than $mul result, for example
110 sigM = P.extract(0, i);
115 clock = port(dsp, \CLK, SigBit());
118 // (2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
119 // (attached to at most two $mux cells that implement clock-enable or
120 // reset functionality, using a subpattern discussed above)
121 // If matched, treat 'A' input as input of ADREG
122 code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
123 if (param(dsp, \ADREG).as_int() == 0) {
130 ffADrstmux = dffrstmux;
131 ffADrstpol = dffrstpol;
134 ffADcemux = dffcemux;
135 ffADcepol = dffcepol;
142 // (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
145 if sigD.empty() || sigD.is_fully_zero()
146 // Ensure that preAdder not already used
147 if param(dsp, \USE_DPORT).decode_string() == "FALSE"
148 if port(dsp, \INMODE, Const(0, 5)).is_fully_zero()
150 select preAdd->type.in($add)
151 // Output has to be 25 bits or less
152 select GetSize(port(preAdd, \Y)) <= 25
153 select nusers(port(preAdd, \Y)) == 2
154 choice <IdString> AB {\A, \B}
155 // A port has to be 30 bits or less
156 select GetSize(port(preAdd, AB)) <= 30
157 define <IdString> BA (AB == \A ? \B : \A)
158 // D port has to be 25 bits or less
159 select GetSize(port(preAdd, BA)) <= 25
160 index <SigSpec> port(preAdd, \Y) === sigA
167 sigA = port(preAdd, \A);
168 sigD = port(preAdd, \B);
172 // (4) If pre-adder was present, find match 'A' input for A2REG
173 // If pre-adder was not present, move ADREG to A2REG
174 // Then match 'A' input for A1REG
175 code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol
176 // Only search for ffA2 if there was a pre-adder
177 // (otherwise ffA2 would have been matched as ffAD)
179 if (param(dsp, \AREG).as_int() == 0) {
186 ffA2rstmux = dffrstmux;
187 ffArstpol = dffrstpol;
190 ffA2cepol = dffcepol;
191 ffA2cemux = dffcemux;
197 // And if there wasn't a pre-adder,
198 // move AD register to A
200 log_assert(!ffA2 && !ffA2cemux && !ffA2rstmux);
201 std::swap(ffA2, ffAD);
202 std::swap(ffA2cemux, ffADcemux);
203 std::swap(ffA2rstmux, ffADrstmux);
204 ffA2cepol = ffADcepol;
205 ffArstpol = ffADrstpol;
208 // Now attempt to match A1
213 if ((ffA2rstmux != nullptr) ^ (dffrstmux != nullptr))
216 if (ffArstpol != dffrstpol)
218 if (port(ffA2rstmux, \S) != port(dffrstmux, \S))
220 ffA1rstmux = dffrstmux;
227 ffA1cemux = dffcemux;
228 ffA1cepol = dffcepol;
237 // (5) Match 'B' input for B2REG
238 // If B2REG, then match 'B' input for B1REG
239 code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol
240 if (param(dsp, \BREG).as_int() == 0) {
247 ffB2rstmux = dffrstmux;
248 ffBrstpol = dffrstpol;
251 ffB2cemux = dffcemux;
252 ffB2cepol = dffcepol;
256 // Now attempt to match B1
261 if ((ffB2rstmux != nullptr) ^ (dffrstmux != nullptr))
264 if (ffBrstpol != dffrstpol)
266 if (port(ffB2rstmux, \S) != port(dffrstmux, \S))
268 ffB1rstmux = dffrstmux;
275 ffB1cemux = dffcemux;
276 ffB1cepol = dffcepol;
288 // (6) Match 'D' input for DREG
289 code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
290 if (param(dsp, \DREG).as_int() == 0) {
297 ffDrstmux = dffrstmux;
298 ffDrstpol = dffrstpol;
309 // (7) Match 'P' output that exclusively drives an MREG
310 code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
311 if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
313 subpattern(out_dffe);
318 ffMrstmux = dffrstmux;
319 ffMrstpol = dffrstpol;
331 // (8) Match 'P' output that exclusively drives one of two inputs to an $add
332 // cell (post-adder).
333 // The other input to the adder is assumed to come in from the 'C' input
334 // (note: 'P' -> 'C' connections that exist for accumulators are
335 // recognised in xilinx_dsp.cc).
337 // Ensure that Z mux is not already used
338 if port(dsp, \OPMODE, SigSpec(0, 7)).extract(4,3).is_fully_zero()
340 select postAdd->type.in($add)
341 select GetSize(port(postAdd, \Y)) <= 48
342 choice <IdString> AB {\A, \B}
343 select nusers(port(postAdd, AB)) <= 3
344 filter ffMcemux || nusers(port(postAdd, AB)) == 2
345 filter !ffMcemux || nusers(port(postAdd, AB)) == 3
347 index <SigBit> port(postAdd, AB)[0] === sigP[0]
348 filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
349 filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
350 // Check that remainder of AB is a sign- or zero-extension
351 filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
359 sigC = port(postAdd, postAddAB == \A ? \B : \A);
360 sigP = port(postAdd, \Y);
364 // (9) Match 'P' output that exclusively drives a PREG
365 code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
366 if (param(dsp, \PREG).as_int() == 0) {
368 // If ffMcemux and no postAdd new-value net must have three users: ffMcemux, ffM and ffPcemux
369 if (ffMcemux && !postAdd) users++;
370 if (nusers(sigP) == users) {
372 subpattern(out_dffe);
377 ffPrstmux = dffrstmux;
378 ffPrstpol = dffrstpol;
390 // (10) If post-adder and PREG both present, match for a $mux cell driving
391 // the 'C' input, where one of the $mux's inputs is the PREG output.
392 // This indicates an accumulator situation, and one where a $mux exists
393 // to override the accumulated value:
394 // +--------------------------------+
399 // | /-------\ +----+ |
400 // +----+ +-| post- |___|PREG|---+ 'P'
401 // |MREG|------ | adder | +----+
406 select postAddMux->type.in($mux)
407 select nusers(port(postAddMux, \Y)) == 2
408 choice <IdString> AB {\A, \B}
409 index <SigSpec> port(postAddMux, AB) === sigP
410 index <SigSpec> port(postAddMux, \Y) === sigC
417 sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
420 // (11) If PREG present, match for a greater-than-or-equal $ge cell attached to
421 // the 'P' output where it is compared to a constant that is a power-of-2:
422 // e.g. `assign overflow = (PREG >= 2**40);`
423 // In this scenario, the pattern detector functionality of a DSP48E1 can
424 // to implement this function
427 if param(dsp, \USE_PATTERN_DETECT).decode_string() == "NO_PATDET"
428 select overflow->type.in($ge)
429 select GetSize(port(overflow, \Y)) <= 48
430 select port(overflow, \B).is_fully_const()
431 define <Const> B port(overflow, \B).as_const()
432 select std::count(B.bits.begin(), B.bits.end(), State::S1) == 1
433 index <SigSpec> port(overflow, \A) === sigP
441 // #######################
443 // Subpattern for matching against input registers, based on knowledge of the
444 // 'Q' input. Typically, identifying registers with clock-enable and reset
445 // capability would be a task would be handled by other Yosys passes such as
446 // dff2dffe, but since DSP inference happens much before this, these patterns
447 // have to be manually identified.
449 // (1) Starting from a $dff cell that (partially or fully) drives the given
451 // (2) Match for a $mux cell implementing synchronous reset semantics ---
452 // one that exclusively drives the 'D' input of the $dff, with one of its
453 // $mux inputs being fully zero
454 // (3) Match for a $mux cell implement clock enable semantics --- one that
455 // exclusively drives the 'D' input of the $dff (or the other input of
456 // the reset $mux) and where one of this $mux's inputs is connected to
457 // the 'Q' output of the $dff
465 for (const auto &c : argQ.chunks()) {
466 // Abandon matches when 'Q' is a constant
469 // Abandon matches when 'Q' has the keep attribute set
470 if (c.wire->get_bool_attribute(\keep))
472 // Abandon matches when 'Q' has a non-zero init attribute set
473 // (not supported by DSP48E1)
474 Const init = c.wire->attributes.at(\init, Const());
476 for (auto b : init.extract(c.offset, c.width))
477 if (b != State::Sx && b != State::S0)
482 // (1) Starting from a $dff cell that (partially or fully) drives the given
485 select ff->type.in($dff)
486 // DSP48E1 does not support clock inversion
487 select param(ff, \CLK_POLARITY).as_bool()
489 slice offset GetSize(port(ff, \D))
490 index <SigBit> port(ff, \Q)[offset] === argQ[0]
492 // Check that the rest of argQ is present
493 filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
494 filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
496 filter clock == SigBit() || port(ff, \CLK) == clock
502 SigSpec Q = port(ff, \Q);
504 dffclock = port(ff, \CLK);
508 dffD.replace(argQ, argD);
509 // Only search for ffrstmux if dffD only
510 // has two (ff, ffrstmux) users
511 if (nusers(dffD) > 2)
515 // (2) Match for a $mux cell implementing synchronous reset semantics ---
516 // exclusively drives the 'D' input of the $dff, with one of the $mux
517 // inputs being fully zero
520 select ffrstmux->type.in($mux)
521 index <SigSpec> port(ffrstmux, \Y) === argD
523 choice <IdString> BA {\B, \A}
524 // DSP48E1 only supports reset to zero
525 select port(ffrstmux, BA).is_fully_zero()
527 define <bool> pol (BA == \B)
534 dffrstmux = ffrstmux;
535 dffrstpol = ffrstpol;
536 argD = port(ffrstmux, ffrstpol ? \A : \B);
537 dffD.replace(port(ffrstmux, \Y), argD);
539 // Only search for ffcemux if argQ has at
540 // least 3 users (ff, <upstream>, ffrstmux) and
541 // dffD only has two (ff, ffrstmux)
542 if (!(nusers(argQ) >= 3 && nusers(dffD) == 2))
549 // (3) Match for a $mux cell implement clock enable semantics --- one that
550 // exclusively drives the 'D' input of the $dff (or the other input of
551 // the reset $mux) and where one of this $mux's inputs is connected to
552 // the 'Q' output of the $dff
555 select ffcemux->type.in($mux)
556 index <SigSpec> port(ffcemux, \Y) === argD
557 choice <IdString> AB {\A, \B}
558 index <SigSpec> port(ffcemux, AB) === argQ
559 define <bool> pol (AB == \A)
568 argD = port(ffcemux, ffcepol ? \B : \A);
569 dffD.replace(port(ffcemux, \Y), argD);
575 // #######################
577 // Subpattern for matching against output registers, based on knowledge of the
580 // (1) Starting from an optional $mux cell that implements clock enable
581 // semantics --- one where the given 'D' argument (partially or fully)
582 // drives one of its two inputs
583 // (2) Starting from, or continuing onto, another optional $mux cell that
584 // implements synchronous reset semantics --- one where the given 'D'
585 // argument (or the clock enable $mux output) drives one of its two inputs
586 // and where the other input is fully zero
587 // (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
588 // output of the previous clock enable or reset $mux cells)
594 for (auto c : argD.chunks())
595 // Abandon matches when 'D' has the keep attribute set
596 if (c.wire->get_bool_attribute(\keep))
600 // (1) Starting from an optional $mux cell that implements clock enable
601 // semantics --- one where the given 'D' argument (partially or fully)
602 // drives one of its two inputs
604 select ffcemux->type.in($mux)
605 // ffcemux output must have two users: ffcemux and ff.D
606 select nusers(port(ffcemux, \Y)) == 2
608 choice <IdString> AB {\A, \B}
609 // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s)
610 select nusers(port(ffcemux, AB)) >= 3
612 slice offset GetSize(port(ffcemux, \Y))
613 define <IdString> BA (AB == \A ? \B : \A)
614 index <SigBit> port(ffcemux, BA)[offset] === argD[0]
616 // Check that the rest of argD is present
617 filter GetSize(port(ffcemux, BA)) >= offset + GetSize(argD)
618 filter port(ffcemux, BA).extract(offset, GetSize(argD)) == argD
621 define <bool> pol (AB == \A)
630 SigSpec BA = port(ffcemux, ffcepol ? \B : \A);
631 SigSpec Y = port(ffcemux, \Y);
634 argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B));
641 // (2) Starting from, or continuing onto, another optional $mux cell that
642 // implements synchronous reset semantics --- one where the given 'D'
643 // argument (or the clock enable $mux output) drives one of its two inputs
644 // and where the other input is fully zero
646 select ffrstmux->type.in($mux)
647 // ffrstmux output must have two users: ffrstmux and ff.D
648 select nusers(port(ffrstmux, \Y)) == 2
650 choice <IdString> BA {\B, \A}
651 // DSP48E1 only supports reset to zero
652 select port(ffrstmux, BA).is_fully_zero()
654 slice offset GetSize(port(ffrstmux, \Y))
655 define <IdString> AB (BA == \B ? \A : \B)
656 index <SigBit> port(ffrstmux, AB)[offset] === argD[0]
658 // Check that offset is consistent
659 filter !ffcemux || ffoffset == offset
660 // Check that the rest of argD is present
661 filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD)
662 filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD
665 define <bool> pol (AB == \A)
672 dffrstmux = ffrstmux;
674 SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B);
675 SigSpec Y = port(ffrstmux, \Y);
678 dffrstmux = ffrstmux;
679 dffrstpol = ffrstpol;
683 // (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
684 // output of the previous clock enable or reset $mux cells)
686 select ff->type.in($dff)
687 // DSP48E1 does not support clock inversion
688 select param(ff, \CLK_POLARITY).as_bool()
690 slice offset GetSize(port(ff, \D))
691 index <SigBit> port(ff, \D)[offset] === argD[0]
693 // Check that offset is consistent
694 filter (!ffcemux && !ffrstmux) || ffoffset == offset
695 // Check that the rest of argD is present
696 filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
697 filter port(ff, \D).extract(offset, GetSize(argD)) == argD
698 // Check that FF.Q is connected to CE-mux
699 filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
701 filter clock == SigBit() || port(ff, \CLK) == clock
707 SigSpec D = port(ff, \D);
708 SigSpec Q = port(ff, \Q);
714 // Abandon matches when 'Q' has a non-zero init attribute set
715 // (not supported by DSP48E1)
716 for (auto c : argQ.chunks()) {
717 Const init = c.wire->attributes.at(\init, Const());
719 for (auto b : init.extract(c.offset, c.width))
720 if (b != State::Sx && b != State::S0)
726 dffclock = port(ff, \CLK);