Merge pull request #1637 from YosysHQ/mwk/fix-1634
[yosys.git] / passes / pmgen / ice40_dsp.pmg
index 24bdfd3f2e4d83d9ed6964d05d6c46103645ae69..9d649cb98a2b0b3950a389fa0305b2a771af3867 100644 (file)
 pattern ice40_dsp
 
 state <SigBit> clock
-state <bool> clock_pol
-state <std::set<SigBit>> sigAset sigBset
-state <SigSpec> sigA sigB sigCD sigH sigO sigOused
-state <Cell*> addAB muxAB
+state <bool> clock_pol cd_signed o_lo
+state <SigSpec> sigA sigB sigCD sigH sigO
+state <Cell*> add mux
+state <IdString> addAB muxAB
+
+state <bool> ffAholdpol ffBholdpol ffCDholdpol ffOholdpol
+state <bool> ffArstpol ffBrstpol ffCDrstpol ffOrstpol
+
+state <Cell*> ffA ffAholdmux ffArstmux ffB ffBholdmux ffBrstmux ffCD ffCDholdmux
+state <Cell*> ffFJKG ffH ffO ffOholdmux ffOrstmux
+
+// subpattern
+state <SigSpec> argQ argD
+state <bool> ffholdpol ffrstpol
+state <int> ffoffset
+udata <SigSpec> dffD dffQ
+udata <SigBit> dffclock
+udata <Cell*> dff dffholdmux dffrstmux
+udata <bool> dffholdpol dffrstpol dffclock_pol
 
 match mul
        select mul->type.in($mul, \SB_MAC16)
        select GetSize(mul->getPort(\A)) + GetSize(mul->getPort(\B)) > 10
 endmatch
 
-code sigAset sigBset
-       SigSpec A = port(mul, \A);
-       A.remove_const();
-       sigAset = A.to_sigbit_set();
-       SigSpec B = port(mul, \B);
-       B.remove_const();
-       sigBset = B.to_sigbit_set();
-endcode
-
-code sigH
+code sigA sigB sigH
+       auto unextend = [](const SigSpec &sig) {
+               int i;
+               for (i = GetSize(sig)-1; i > 0; i--)
+                       if (sig[i] != sig[i-1])
+                               break;
+               // Do not remove non-const sign bit
+               if (sig[i].wire)
+                       ++i;
+               return sig.extract(0, i);
+       };
+       sigA = unextend(port(mul, \A));
+       sigB = unextend(port(mul, \B));
+
+       SigSpec O;
        if (mul->type == $mul)
-               sigH = mul->getPort(\Y);
+               O = mul->getPort(\Y);
        else if (mul->type == \SB_MAC16)
-               sigH = mul->getPort(\O);
+               O = mul->getPort(\O);
        else log_abort();
-       if (GetSize(sigH) <= 10)
+       if (GetSize(O) <= 10)
                reject;
-endcode
 
-match ffA
-       if mul->type != \SB_MAC16 || !param(mul, \A_REG).as_bool()
-       if !sigAset.empty()
-       select ffA->type.in($dff)
-       optional
-endmatch
+       // Only care about those bits that are used
+       int i;
+       for (i = 0; i < GetSize(O); i++) {
+               if (nusers(O[i]) <= 1)
+                       break;
+               sigH.append(O[i]);
+       }
+       // This sigM could have no users if downstream sinks (e.g. $add) is
+       //   narrower than $mul result, for example
+       if (i == 0)
+               reject;
 
-code sigA clock clock_pol
-       sigA = port(mul, \A);
+       log_assert(nusers(O.extract_end(i)) <= 1);
+endcode
 
-       if (ffA) {
-               auto ffAset = port(ffA, \Q).to_sigbit_set();
-               if (!std::includes(ffAset.begin(), ffAset.end(), sigAset.begin(), sigAset.end()))
-                       reject;
+code argQ ffA ffAholdmux ffArstmux ffAholdpol ffArstpol sigA clock clock_pol
+       if (mul->type != \SB_MAC16 || !param(mul, \A_REG, State::S0).as_bool()) {
+               argQ = sigA;
+               subpattern(in_dffe);
+               if (dff) {
+                       ffA = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       if (dffrstmux) {
+                               ffArstmux = dffrstmux;
+                               ffArstpol = dffrstpol;
+                       }
+                       if (dffholdmux) {
+                               ffAholdmux = dffholdmux;
+                               ffAholdpol = dffholdpol;
+                       }
+                       sigA = dffD;
+               }
+       }
+endcode
 
-               for (auto b : port(ffA, \Q))
-                       if (b.wire->get_bool_attribute(\keep))
-                               reject;
+code argQ ffB ffBholdmux ffBrstmux ffBholdpol ffBrstpol sigB clock clock_pol
+       if (mul->type != \SB_MAC16 || !param(mul, \B_REG, State::S0).as_bool()) {
+               argQ = sigB;
+               subpattern(in_dffe);
+               if (dff) {
+                       ffB = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       if (dffrstmux) {
+                               ffBrstmux = dffrstmux;
+                               ffBrstpol = dffrstpol;
+                       }
+                       if (dffholdmux) {
+                               ffBholdmux = dffholdmux;
+                               ffBholdpol = dffholdpol;
+                       }
+                       sigB = dffD;
+               }
+       }
+endcode
 
-               clock = port(ffA, \CLK).as_bit();
-               clock_pol = param(ffA, \CLK_POLARITY).as_bool();
+code argD ffFJKG sigH clock clock_pol
+       if (nusers(sigH) == 2 &&
+                       (mul->type != \SB_MAC16 ||
+                        (!param(mul, \TOP_8x8_MULT_REG, State::S0).as_bool() && !param(mul, \BOT_8x8_MULT_REG, State::S0).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1, State::S0).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1, State::S0).as_bool()))) {
+               argD = sigH;
+               subpattern(out_dffe);
+               if (dff) {
+                       // F/J/K/G do not have a CE-like (hold) input
+                       if (dffholdmux)
+                               goto reject_ffFJKG;
+
+                       // Reset signal of F/J (IRSTTOP) and K/G (IRSTBOT)
+                       //   shared with A and B
+                       if ((ffArstmux != NULL) != (dffrstmux != NULL))
+                               goto reject_ffFJKG;
+                       if ((ffBrstmux != NULL) != (dffrstmux != NULL))
+                               goto reject_ffFJKG;
+                       if (ffArstmux) {
+                               if (port(ffArstmux, \S) != port(dffrstmux, \S))
+                                       goto reject_ffFJKG;
+                               if (ffArstpol != dffrstpol)
+                                       goto reject_ffFJKG;
+                       }
+                       if (ffBrstmux) {
+                               if (port(ffBrstmux, \S) != port(dffrstmux, \S))
+                                       goto reject_ffFJKG;
+                               if (ffBrstpol != dffrstpol)
+                                       goto reject_ffFJKG;
+                       }
+
+                       ffFJKG = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       sigH = dffQ;
+
+reject_ffFJKG:                 ;
+               }
+       }
+endcode
 
-               sigA.replace(port(ffA, \Q), port(ffA, \D));
+code argD ffH sigH sigO clock clock_pol
+       if (ffFJKG && nusers(sigH) == 2 &&
+                       (mul->type != \SB_MAC16 || !param(mul, \PIPELINE_16x16_MULT_REG2, State::S0).as_bool())) {
+               argD = sigH;
+               subpattern(out_dffe);
+               if (dff) {
+                       // H does not have a CE-like (hold) input
+                       if (dffholdmux)
+                               goto reject_ffH;
+
+                       // Reset signal of H (IRSTBOT) shared with B
+                       if ((ffBrstmux != NULL) != (dffrstmux != NULL))
+                               goto reject_ffH;
+                       if (ffBrstmux) {
+                               if (port(ffBrstmux, \S) != port(dffrstmux, \S))
+                                       goto reject_ffH;
+                               if (ffBrstpol != dffrstpol)
+                                       goto reject_ffH;
+                       }
+
+                       ffH = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       sigH = dffQ;
+
+reject_ffH:            ;
+               }
        }
+
+       sigO = sigH;
 endcode
 
-match ffB
-       if mul->type != \SB_MAC16 || !param(mul, \B_REG).as_bool()
-       if !sigBset.empty()
-       select ffB->type.in($dff)
-       optional
-endmatch
+match add
+       if mul->type != \SB_MAC16 || (param(mul, \TOPOUTPUT_SELECT, State::S0).as_int() == 3 && param(mul, \BOTOUTPUT_SELECT, State::S0).as_int() == 3)
 
-code sigB clock clock_pol
-       sigB = port(mul, \B);
+       select add->type.in($add)
+       choice <IdString> AB {\A, \B}
+       select nusers(port(add, AB)) == 2
 
-       if (ffB) {
-               auto ffBset = port(ffB, \Q).to_sigbit_set();
-               if (!std::includes(ffBset.begin(), ffBset.end(), sigBset.begin(), sigBset.end()))
-                       reject;
+       index <SigBit> port(add, AB)[0] === sigH[0]
+       filter GetSize(port(add, AB)) <= GetSize(sigH)
+       filter port(add, AB) == sigH.extract(0, GetSize(port(add, AB)))
+       filter nusers(sigH.extract_end(GetSize(port(add, AB)))) <= 1
+       set addAB AB
+       optional
+endmatch
 
-               for (auto b : port(ffB, \Q))
-                       if (b.wire->get_bool_attribute(\keep))
-                               reject;
+code sigCD sigO cd_signed
+       if (add) {
+               sigCD = port(add, addAB == \A ? \B : \A);
+               cd_signed = param(add, addAB == \A ? \B_SIGNED : \A_SIGNED).as_bool();
 
-               SigBit c = port(ffB, \CLK).as_bit();
-               bool cp = param(ffB, \CLK_POLARITY).as_bool();
+               int natural_mul_width = GetSize(sigA) + GetSize(sigB);
+               int actual_mul_width = GetSize(sigH);
+               int actual_acc_width = GetSize(sigCD);
 
-               if (clock != SigBit() && (c != clock || cp != clock_pol))
+               if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width))
+                       reject;
+               // If accumulator, check adder width and signedness
+               if (sigCD == sigH && (actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED, State::S0).as_bool() != param(add, \A_SIGNED).as_bool()))
                        reject;
 
-               clock = c;
-               clock_pol = cp;
-
-               sigB.replace(port(ffB, \Q), port(ffB, \D));
+               sigO = port(add, \Y);
        }
 endcode
 
-match ffFJKG
-       if mul->type != \SB_MAC16 || (!param(mul, \TOP_8x8_MULT_REG).as_bool() && !param(mul, \BOT_8x8_MULT_REG).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG2).as_bool())
-       select ffFJKG->type.in($dff)
-       select nusers(port(ffFJKG, \D)) == 2
-       index <SigSpec> port(ffFJKG, \D) === sigH
-       // Ensure pipeline register is not already used
+match mux
+       select mux->type == $mux
+       choice <IdString> AB {\A, \B}
+       select nusers(port(mux, AB)) == 2
+       index <SigSpec> port(mux, AB) === sigO
+       set muxAB AB
        optional
 endmatch
 
-code sigH sigO clock clock_pol
-       if (ffFJKG) {
-               sigH = port(ffFJKG, \Q);
-               for (auto b : sigH)
-                       if (b.wire->get_bool_attribute(\keep))
-                               reject;
+code sigO
+       if (mux)
+               sigO = port(mux, \Y);
+endcode
 
-               SigBit c = port(ffFJKG, \CLK).as_bit();
-               bool cp = param(ffFJKG, \CLK_POLARITY).as_bool();
+code argD ffO ffOholdmux ffOrstmux ffOholdpol ffOrstpol sigO sigCD clock clock_pol cd_signed o_lo
+       if (mul->type != \SB_MAC16 ||
+                       // Ensure that register is not already used
+                       ((param(mul, \TOPOUTPUT_SELECT, 0).as_int() != 1 && param(mul, \BOTOUTPUT_SELECT, 0).as_int() != 1) &&
+                        // Ensure that OLOADTOP/OLOADBOT is unused or zero
+                        (port(mul, \OLOADTOP, State::S0).is_fully_zero() && port(mul, \OLOADBOT, State::S0).is_fully_zero()))) {
 
-               if (clock != SigBit() && (c != clock || cp != clock_pol))
-                       reject;
+               dff = nullptr;
 
-               clock = c;
-               clock_pol = cp;
-       }
+               // First try entire sigO
+               if (nusers(sigO) == 2) {
+                       argD = sigO;
+                       subpattern(out_dffe);
+               }
 
-       sigO = sigH;
-endcode
+               // Otherwise try just its least significant 16 bits
+               if (!dff && GetSize(sigO) > 16) {
+                       argD = sigO.extract(0, 16);
+                       if (nusers(argD) == 2) {
+                               subpattern(out_dffe);
+                               o_lo = dff;
+                       }
+               }
 
-match addA
-       select addA->type.in($add)
-       select nusers(port(addA, \A)) == 2
-       filter param(addA, \A_WIDTH).as_int() <= GetSize(sigH)
-       //index <SigSpec> port(addA, \A) === sigH.extract(0, param(addA, \A_WIDTH).as_int())
-       filter port(addA, \A) ==  sigH.extract(0, param(addA, \A_WIDTH).as_int())
-       optional
-endmatch
+               if (dff) {
+                       ffO = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       if (dffrstmux) {
+                               ffOrstmux = dffrstmux;
+                               ffOrstpol = dffrstpol;
+                       }
+                       if (dffholdmux) {
+                               ffOholdmux = dffholdmux;
+                               ffOholdpol = dffholdpol;
+                       }
+
+                       sigO.replace(sigO.extract(0, GetSize(dffQ)), dffQ);
+               }
 
-match addB
-       if !addA
-       select addB->type.in($add, $sub)
-       select nusers(port(addB, \B)) == 2
-       filter param(addB, \B_WIDTH).as_int() <= GetSize(sigH)
-       //index <SigSpec> port(addB, \B) === sigH.extract(0, param(addB, \B_WIDTH).as_int())
-       filter port(addB, \B) ==  sigH.extract(0, param(addB, \B_WIDTH).as_int())
-       optional
-endmatch
+               // Loading value into output register is not
+               //   supported unless using accumulator
+               if (mux) {
+                       if (sigCD != sigO)
+                               reject;
+                       sigCD = port(mux, muxAB == \B ? \A : \B);
 
-code addAB sigCD sigO
-       bool CD_SIGNED = false;
-       if (addA) {
-               addAB = addA;
-               sigCD = port(addAB, \B);
-               CD_SIGNED = param(addAB, \B_SIGNED).as_bool();
-       }
-       if (addB) {
-               addAB = addB;
-               sigCD = port(addAB, \A);
-               CD_SIGNED = param(addAB, \A_SIGNED).as_bool();
+                       cd_signed = add && param(add, \A_SIGNED).as_bool() && param(add, \B_SIGNED).as_bool();
+               }
        }
-       if (addAB) {
-               if (mul->type == \SB_MAC16) {
-                       // Ensure that adder is not used
-                       if (param(mul, \TOPOUTPUT_SELECT).as_int() != 3 ||
-                                       param(mul, \BOTOUTPUT_SELECT).as_int() != 3)
-                               reject;
+endcode
+
+code argQ ffCD ffCDholdmux ffCDholdpol ffCDrstpol sigCD clock clock_pol
+       if (!sigCD.empty() && sigCD != sigO &&
+                       (mul->type != \SB_MAC16 || (!param(mul, \C_REG, State::S0).as_bool() && !param(mul, \D_REG, State::S0).as_bool()))) {
+               argQ = sigCD;
+               subpattern(in_dffe);
+               if (dff) {
+                       if (dffholdmux) {
+                               ffCDholdmux = dffholdmux;
+                               ffCDholdpol = dffholdpol;
+                       }
+
+                       // Reset signal of C (IRSTTOP) and D (IRSTBOT)
+                       //   shared with A and B
+                       if ((ffArstmux != NULL) != (dffrstmux != NULL))
+                               goto reject_ffCD;
+                       if ((ffBrstmux != NULL) != (dffrstmux != NULL))
+                               goto reject_ffCD;
+                       if (ffArstmux) {
+                               if (port(ffArstmux, \S) != port(dffrstmux, \S))
+                                       goto reject_ffCD;
+                               if (ffArstpol != dffrstpol)
+                                       goto reject_ffCD;
+                       }
+                       if (ffBrstmux) {
+                               if (port(ffBrstmux, \S) != port(dffrstmux, \S))
+                                       goto reject_ffCD;
+                               if (ffBrstpol != dffrstpol)
+                                       goto reject_ffCD;
+                       }
+
+                       ffCD = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       sigCD = dffD;
+
+reject_ffCD:           ;
                }
+       }
+endcode
 
-               int natural_mul_width = GetSize(sigA) + GetSize(sigB);
-               int actual_mul_width = GetSize(sigH);
-               int actual_acc_width = GetSize(sigCD);
+code sigCD
+       sigCD.extend_u0(32, cd_signed);
+endcode
 
-               if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width))
+code
+       accept;
+endcode
+
+// #######################
+
+subpattern in_dffe
+arg argD argQ clock clock_pol
+
+code
+       dff = nullptr;
+       if (argQ.empty())
+               reject;
+       for (auto c : argQ.chunks()) {
+               if (!c.wire)
                        reject;
-               // If accumulator, check adder width and signedness
-               if (sigCD == sigH && (actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(addAB, \A_SIGNED).as_bool()))
+               if (c.wire->get_bool_attribute(\keep))
+                       reject;
+               Const init = c.wire->attributes.at(\init, State::Sx);
+               if (!init.is_fully_undef() && !init.is_fully_zero())
                        reject;
-
-               sigO = port(addAB, \Y);
-               sigCD.extend_u0(32, CD_SIGNED);
        }
 endcode
 
-match muxA
-       select muxA->type.in($mux)
-       index <int> nusers(port(muxA, \A)) === 2
-       index <SigSpec> port(muxA, \A) === sigO
-       optional
-endmatch
+match ff
+       select ff->type.in($dff)
+       // DSP48E1 does not support clock inversion
+       select param(ff, \CLK_POLARITY).as_bool()
 
-match muxB
-       if !muxA
-       select muxB->type.in($mux)
-       index <int> nusers(port(muxB, \B)) === 2
-       index <SigSpec> port(muxB, \B) === sigO
-       optional
+       slice offset GetSize(port(ff, \D))
+       index <SigBit> port(ff, \Q)[offset] === argQ[0]
+
+       // Check that the rest of argQ is present
+       filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
+       filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+       set ffoffset offset
 endmatch
 
-code muxAB
-       if (muxA)
-               muxAB = muxA;
-       else if (muxB)
-               muxAB = muxB;
-endcode
+code argQ argD
+{
+       if (clock != SigBit()) {
+               if (port(ff, \CLK) != clock)
+                       reject;
+               if (param(ff, \CLK_POLARITY).as_bool() != clock_pol)
+                       reject;
+       }
 
-// Extract the bits of P that actually have a consumer
-// (as opposed to being a dummy)
-code sigOused
-       for (int i = 0; i < GetSize(sigO); i++)
-               if (!sigO[i].wire || nusers(sigO[i]) == 1)
-                       sigOused.append(State::Sx);
-               else
-                       sigOused.append(sigO[i]);
+       SigSpec Q = port(ff, \Q);
+       dff = ff;
+       dffclock = port(ff, \CLK);
+       dffclock_pol = param(ff, \CLK_POLARITY).as_bool();
+       dffD = argQ;
+       argD = port(ff, \D);
+       argQ = Q;
+       dffD.replace(argQ, argD);
+       // Only search for ffrstmux if dffD only
+       //   has two (ff, ffrstmux) users
+       if (nusers(dffD) > 2)
+               argD = SigSpec();
+}
 endcode
 
-match ffO_lo
-       if nusers(sigOused.extract(0,std::min(16,GetSize(sigOused)))) == 2
-       select ffO_lo->type.in($dff)
-       optional
+match ffrstmux
+       if false /* TODO: ice40 resets are actually async */
+
+       if !argD.empty()
+       select ffrstmux->type.in($mux)
+       index <SigSpec> port(ffrstmux, \Y) === argD
+
+       choice <IdString> BA {\B, \A}
+       // DSP48E1 only supports reset to zero
+       select port(ffrstmux, BA).is_fully_zero()
+
+       define <bool> pol (BA == \B)
+       set ffrstpol pol
+       semioptional
 endmatch
 
-code
-       if (ffO_lo) {
-               SigSpec O = sigOused.extract(0,std::min(16,param(ffO_lo, \WIDTH).as_int()));
-               O.remove_const();
-               auto ffO_loSet = port(ffO_lo, \D).to_sigbit_set();
-               auto Oset = O.to_sigbit_set();
-               if (!std::includes(ffO_loSet.begin(), ffO_loSet.end(), Oset.begin(), Oset.end()))
-                       reject;
+code argD
+       if (ffrstmux) {
+               dffrstmux = ffrstmux;
+               dffrstpol = ffrstpol;
+               argD = port(ffrstmux, ffrstpol ? \A : \B);
+               dffD.replace(port(ffrstmux, \Y), argD);
+
+               // Only search for ffholdmux if argQ has at
+               //   least 3 users (ff, <upstream>, ffrstmux) and
+               //   dffD only has two (ff, ffrstmux)
+               if (!(nusers(argQ) >= 3 && nusers(dffD) == 2))
+                       argD = SigSpec();
        }
+       else
+               dffrstmux = nullptr;
 endcode
 
-match ffO_hi
-       if GetSize(sigOused) > 16
-       if nusers(sigOused.extract_end(16)) == 2
-       select ffO_hi->type.in($dff)
-       optional
+match ffholdmux
+       if !argD.empty()
+       select ffholdmux->type.in($mux)
+       index <SigSpec> port(ffholdmux, \Y) === argD
+       choice <IdString> BA {\B, \A}
+       index <SigSpec> port(ffholdmux, BA) === argQ
+       define <bool> pol (BA == \B)
+       set ffholdpol pol
+       semioptional
 endmatch
 
+code argD
+       if (ffholdmux) {
+               dffholdmux = ffholdmux;
+               dffholdpol = ffholdpol;
+               argD = port(ffholdmux, ffholdpol ? \A : \B);
+               dffD.replace(port(ffholdmux, \Y), argD);
+       }
+       else
+               dffholdmux = nullptr;
+endcode
+
+// #######################
+
+subpattern out_dffe
+arg argD argQ clock clock_pol
+
 code
-       if (ffO_hi) {
-               SigSpec O = sigOused.extract_end(16);
-               O.remove_const();
-               auto ffO_hiSet = port(ffO_hi, \D).to_sigbit_set();
-               auto Oset = O.to_sigbit_set();
-               if (!std::includes(ffO_hiSet.begin(), ffO_hiSet.end(), Oset.begin(), Oset.end()))
+       dff = nullptr;
+       for (auto c : argD.chunks())
+               if (c.wire->get_bool_attribute(\keep))
                        reject;
+endcode
+
+match ffholdmux
+       select ffholdmux->type.in($mux)
+       // ffholdmux output must have two users: ffholdmux and ff.D
+       select nusers(port(ffholdmux, \Y)) == 2
+
+       choice <IdString> BA {\B, \A}
+       // keep-last-value net must have at least three users: ffholdmux, ff, downstream sink(s)
+       select nusers(port(ffholdmux, BA)) >= 3
+
+       slice offset GetSize(port(ffholdmux, \Y))
+       define <IdString> AB (BA == \B ? \A : \B)
+       index <SigBit> port(ffholdmux, AB)[offset] === argD[0]
+
+       // Check that the rest of argD is present
+       filter GetSize(port(ffholdmux, AB)) >= offset + GetSize(argD)
+       filter port(ffholdmux, AB).extract(offset, GetSize(argD)) == argD
+
+       set ffoffset offset
+       define <bool> pol (BA == \B)
+       set ffholdpol pol
+
+       semioptional
+endmatch
+
+code argD argQ
+       dffholdmux = ffholdmux;
+       if (ffholdmux) {
+               SigSpec AB = port(ffholdmux, ffholdpol ? \A : \B);
+               SigSpec Y = port(ffholdmux, \Y);
+               argQ = argD;
+               argD.replace(AB, Y);
+               argQ.replace(AB, port(ffholdmux, ffholdpol ? \B : \A));
+
+               dffholdmux = ffholdmux;
+               dffholdpol = ffholdpol;
        }
 endcode
 
-code clock clock_pol sigO sigCD
-       if (ffO_lo || ffO_hi) {
-               if (mul->type == \SB_MAC16) {
-                       // Ensure that register is not already used
-                       if (param(mul, \TOPOUTPUT_SELECT).as_int() == 1 ||
-                                       param(mul, \BOTOUTPUT_SELECT).as_int() == 1)
-                               reject;
+match ffrstmux
+       if false /* TODO: ice40 resets are actually async */
 
-                       // Ensure that OLOADTOP/OLOADBOT is unused or zero
-                       if ((mul->hasPort(\OLOADTOP) && !port(mul, \OLOADTOP).is_fully_zero())
-                               || (mul->hasPort(\OLOADBOT) && !port(mul, \OLOADBOT).is_fully_zero()))
-                               reject;
-               }
+       select ffrstmux->type.in($mux)
+       // ffrstmux output must have two users: ffrstmux and ff.D
+       select nusers(port(ffrstmux, \Y)) == 2
 
-               if (ffO_lo) {
-                       for (auto b : port(ffO_lo, \Q))
-                               if (b.wire->get_bool_attribute(\keep))
-                                       reject;
+       choice <IdString> BA {\B, \A}
+       // DSP48E1 only supports reset to zero
+       select port(ffrstmux, BA).is_fully_zero()
 
-                       SigBit c = port(ffO_lo, \CLK).as_bit();
-                       bool cp = param(ffO_lo, \CLK_POLARITY).as_bool();
+       slice offset GetSize(port(ffrstmux, \Y))
+       define <IdString> AB (BA == \B ? \A : \B)
+       index <SigBit> port(ffrstmux, AB)[offset] === argD[0]
 
-                       if (clock != SigBit() && (c != clock || cp != clock_pol))
-                               reject;
+       // Check that offset is consistent
+       filter !ffholdmux || ffoffset == offset
+       // Check that the rest of argD is present
+       filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD)
+       filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD
 
-                       clock = c;
-                       clock_pol = cp;
+       set ffoffset offset
+       define <bool> pol (AB == \A)
+       set ffrstpol pol
 
-                       sigO.replace(port(ffO_lo, \D), port(ffO_lo, \Q));
-               }
+       semioptional
+endmatch
 
-               if (ffO_hi) {
-                       for (auto b : port(ffO_hi, \Q))
-                               if (b.wire->get_bool_attribute(\keep))
-                                       reject;
+code argD argQ
+       dffrstmux = ffrstmux;
+       if (ffrstmux) {
+               SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B);
+               SigSpec Y = port(ffrstmux, \Y);
+               argD.replace(AB, Y);
 
-                       SigBit c = port(ffO_hi, \CLK).as_bit();
-                       bool cp = param(ffO_hi, \CLK_POLARITY).as_bool();
+               dffrstmux = ffrstmux;
+               dffrstpol = ffrstpol;
+       }
+endcode
 
-                       if (clock != SigBit() && (c != clock || cp != clock_pol))
-                               reject;
+match ff
+       select ff->type.in($dff)
+       // DSP48E1 does not support clock inversion
+       select param(ff, \CLK_POLARITY).as_bool()
 
-                       clock = c;
-                       clock_pol = cp;
+       slice offset GetSize(port(ff, \D))
+       index <SigBit> port(ff, \D)[offset] === argD[0]
 
-                       sigO.replace(port(ffO_hi, \D), port(ffO_hi, \Q));
+       // Check that offset is consistent
+       filter (!ffholdmux && !ffrstmux) || ffoffset == offset
+       // Check that the rest of argD is present
+       filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
+       filter port(ff, \D).extract(offset, GetSize(argD)) == argD
+       // Check that FF.Q is connected to CE-mux
+       filter !ffholdmux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+       set ffoffset offset
+endmatch
+
+code argQ
+       if (ff) {
+               if (clock != SigBit()) {
+                       if (port(ff, \CLK) != clock)
+                               reject;
+                       if (param(ff, \CLK_POLARITY).as_bool() != clock_pol)
+                               reject;
+               }
+               SigSpec D = port(ff, \D);
+               SigSpec Q = port(ff, \Q);
+               if (!ffholdmux) {
+                       argQ = argD;
+                       argQ.replace(D, Q);
                }
 
-               // Loading value into output register is not
-               //   supported unless using accumulator
-               if (muxAB) {
-                       if (sigCD != sigO)
+               for (auto c : argQ.chunks()) {
+                       Const init = c.wire->attributes.at(\init, State::Sx);
+                       if (!init.is_fully_undef() && !init.is_fully_zero())
                                reject;
-                       if (muxA)
-                               sigCD = port(muxAB, \B);
-                       else if (muxB)
-                               sigCD = port(muxAB, \A);
-                       else log_abort();
-                       sigCD.extend_u0(32, addAB && param(addAB, \A_SIGNED).as_bool() && param(addAB, \B_SIGNED).as_bool());
                }
+
+               dff = ff;
+               dffQ = argQ;
+               dffclock = port(ff, \CLK);
+               dffclock_pol = param(ff, \CLK_POLARITY).as_bool();
        }
-       accept;
+       // No enable/reset mux possible without flop
+       else if (dffholdmux || dffrstmux)
+               reject;
 endcode