Refactor ice40_dsp.pmg
authorEddie Hung <eddie@fpgeh.com>
Thu, 19 Sep 2019 19:00:48 +0000 (12:00 -0700)
committerEddie Hung <eddie@fpgeh.com>
Thu, 19 Sep 2019 19:00:48 +0000 (12:00 -0700)
passes/pmgen/ice40_dsp.cc
passes/pmgen/ice40_dsp.pmg

index 68fc29f311f9b6457d553761e09685f2f8e3350d..4132857d6b4bdba4c7f86389fb6a30448d002b96 100644 (file)
@@ -31,12 +31,12 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
 
 #if 1
        log("\n");
-       log("ffA:    %s\n", log_id(st.ffA, "--"));
-       log("ffB:    %s\n", log_id(st.ffB, "--"));
+       log("ffA:    %s %s %s\n", log_id(st.ffA, "--"), log_id(st.ffAcemux, "--"), log_id(st.ffArstmux, "--"));
+       log("ffB:    %s %s %s\n", log_id(st.ffB, "--"), log_id(st.ffBcemux, "--"), log_id(st.ffBrstmux, "--"));
        log("mul:    %s\n", log_id(st.mul, "--"));
-       log("ffFJKG: %s\n", log_id(st.ffFJKG, "--"));
-       log("addAB:  %s\n", log_id(st.addAB, "--"));
-       log("muxAB:  %s\n", log_id(st.muxAB, "--"));
+       log("ffFJKG: %s n/a %s\n", log_id(st.ffFJKG, "--"), log_id(st.ffFJKGrstmux, "--"));
+       log("add:    %s\n", log_id(st.add, "--"));
+       log("mux:    %s\n", log_id(st.mux, "--"));
        log("ffO:    %s\n", log_id(st.ffO, "--"));
 #endif
 
@@ -146,10 +146,10 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
        SigSpec O = st.sigO;
        int O_width = GetSize(O);
        if (O_width == 33) {
-               log_assert(st.addAB);
+               log_assert(st.add);
                // If we have a signed multiply-add, then perform sign extension
                // TODO: Need to check CD[31:16] is sign extension of CD[15:0]?
-               if (st.addAB->getParam("\\A_SIGNED").as_bool() && st.addAB->getParam("\\B_SIGNED").as_bool())
+               if (st.add->getParam("\\A_SIGNED").as_bool() && st.add->getParam("\\B_SIGNED").as_bool())
                        pm.module->connect(O[32], O[31]);
                else
                        cell->setPort("\\CO", O[32]);
@@ -164,18 +164,14 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
        cell->setPort("\\O", O);
 
        bool accum = false;
-       if (st.addAB) {
-               if (st.addA)
-                       accum = (st.ffO && st.addAB->getPort("\\B") == st.sigO);
-               else if (st.addB)
-                       accum = (st.ffO && st.addAB->getPort("\\A") == st.sigO);
-               else log_abort();
+       if (st.add) {
+               accum = (st.ffO && st.add->getPort(st.addAB == "\\A" ? "\\B" : "\\A") == st.sigO);
                if (accum)
-                       log("  accumulator %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type));
+                       log("  accumulator %s (%s)\n", log_id(st.add), log_id(st.add->type));
                else
-                       log("  adder %s (%s)\n", log_id(st.addAB), log_id(st.addAB->type));
-               cell->setPort("\\ADDSUBTOP", st.addAB->type == "$add" ? State::S0 : State::S1);
-               cell->setPort("\\ADDSUBBOT", st.addAB->type == "$add" ? State::S0 : State::S1);
+                       log("  adder %s (%s)\n", log_id(st.add), log_id(st.add->type));
+               cell->setPort("\\ADDSUBTOP", st.add->type == "$add" ? State::S0 : State::S1);
+               cell->setPort("\\ADDSUBBOT", st.add->type == "$add" ? State::S0 : State::S1);
        } else {
                cell->setPort("\\ADDSUBTOP", State::S0);
                cell->setPort("\\ADDSUBBOT", State::S0);
@@ -188,10 +184,12 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
        cell->setPort("\\OHOLDBOT", State::S0);
 
        SigSpec acc_reset = State::S0;
-       if (st.muxA)
-               acc_reset = st.muxA->getPort("\\S");
-       if (st.muxB)
-               acc_reset = pm.module->Not(NEW_ID, st.muxB->getPort("\\S"));
+       if (st.mux) {
+               if (st.muxAB == "\\A")
+                       acc_reset = st.mux->getPort("\\S");
+               else
+                       acc_reset = pm.module->Not(NEW_ID, st.mux->getPort("\\S"));
+       }
 
        cell->setPort("\\OLOADTOP", acc_reset);
        cell->setPort("\\OLOADBOT", acc_reset);
@@ -219,8 +217,8 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
        cell->setParam("\\B_SIGNED", st.mul->getParam("\\B_SIGNED").as_bool());
 
        if (st.ffO) {
-               if (st.ffO_lo)
-                       cell->setParam("\\TOPOUTPUT_SELECT", Const(st.addAB ? 0 : 3, 2));
+               if (st.o_lo)
+                       cell->setParam("\\TOPOUTPUT_SELECT", Const(st.add ? 0 : 3, 2));
                else
                        cell->setParam("\\TOPOUTPUT_SELECT", Const(1, 2));
 
@@ -228,8 +226,8 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
                cell->setParam("\\BOTOUTPUT_SELECT", Const(1, 2));
        }
        else {
-               cell->setParam("\\TOPOUTPUT_SELECT", Const(st.addAB ? 0 : 3, 2));
-               cell->setParam("\\BOTOUTPUT_SELECT", Const(st.addAB ? 0 : 3, 2));
+               cell->setParam("\\TOPOUTPUT_SELECT", Const(st.add ? 0 : 3, 2));
+               cell->setParam("\\BOTOUTPUT_SELECT", Const(st.add ? 0 : 3, 2));
        }
 
        if (cell != st.mul)
@@ -237,7 +235,7 @@ void create_ice40_dsp(ice40_dsp_pm &pm)
        else
                pm.blacklist(st.mul);
        pm.autoremove(st.ffFJKG);
-       pm.autoremove(st.addAB);
+       pm.autoremove(st.add);
 }
 
 struct Ice40DspPass : public Pass {
@@ -249,6 +247,7 @@ struct Ice40DspPass : public Pass {
                log("    ice40_dsp [options] [selection]\n");
                log("\n");
                log("Map multipliers and multiply-accumulate blocks to iCE40 DSP resources.\n");
+               log("Currently, only the 16x16 multiply mode is supported and not the 2 x 8x8 mode.\n");
                log("\n");
        }
        void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
index fbf49810930cfdc8b51ed47b538afff08e925e21..22267aea731dd71f1f0201749a095d9ee68535d2 100644 (file)
@@ -1,9 +1,25 @@
 pattern ice40_dsp
 
 state <SigBit> clock
-state <bool> clock_pol cd_signed
+state <bool> clock_pol cd_signed o_lo
 state <SigSpec> sigA sigB sigCD sigH sigO
-state <Cell*> addAB muxAB
+state <Cell*> add mux
+state <IdString> addAB muxAB
+
+state <bool> ffAcepol ffBcepol ffCDcepol ffOcepol
+state <bool> ffArstpol ffBrstpol ffCDrstpol ffFJKGrstpol ffOrstpol
+
+state <Cell*> ffA ffAcemux ffArstmux ffB ffBcemux ffBrstmux ffCD ffCDcemux ffCDrstmux
+state <Cell*> ffFJKG ffFJKGrstmux ffO ffOcemux ffOrstmux
+
+// subpattern
+state <SigSpec> argQ argD
+state <bool> ffcepol ffrstpol
+state <int> ffoffset
+udata <SigSpec> dffD dffQ
+udata <SigBit> dffclock
+udata <Cell*> dff dffcemux dffrstmux
+udata <bool> dffcepol dffrstpol dffclock_pol
 
 match mul
        select mul->type.in($mul, \SB_MAC16)
@@ -47,226 +63,443 @@ code sigA sigB sigH
        log_assert(nusers(O.extract_end(i)) <= 1);
 endcode
 
-match ffA
-       if mul->type != \SB_MAC16 || !param(mul, \A_REG).as_bool()
-       select ffA->type.in($dff)
-       filter GetSize(port(ffA, \Q)) >= GetSize(sigA)
-       slice offset GetSize(port(ffA, \Q))
-       filter offset+GetSize(sigA) <= GetSize(port(ffA, \Q)) && port(ffA, \Q).extract(offset, GetSize(sigA)) == sigA
-       optional
-endmatch
+code argQ ffA ffAcemux ffArstmux ffAcepol ffArstpol sigA clock clock_pol
+       if (mul->type != \SB_MAC16 || !param(mul, \A_REG).as_bool()) {
+               argQ = sigA;
+               subpattern(in_dffe);
+               if (dff) {
+                       ffA = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       if (dffrstmux) {
+                               ffArstmux = dffrstmux;
+                               ffArstpol = dffrstpol;
+                       }
+                       if (dffcemux) {
+                               ffAcemux = dffcemux;
+                               ffAcepol = dffcepol;
+                       }
+                       sigA = dffD;
+               }
+       }
+endcode
 
-code sigA clock clock_pol
-       if (ffA) {
-               for (auto b : port(ffA, \Q))
-                       if (b.wire->get_bool_attribute(\keep))
-                               reject;
+code argQ ffB ffBcemux ffBrstmux ffBcepol ffBrstpol sigB clock clock_pol
+       if (mul->type != \SB_MAC16 || !param(mul, \B_REG).as_bool()) {
+               argQ = sigB;
+               subpattern(in_dffe);
+               if (dff) {
+                       ffB = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       if (dffrstmux) {
+                               ffBrstmux = dffrstmux;
+                               ffBrstpol = dffrstpol;
+                       }
+                       if (dffcemux) {
+                               ffBcemux = dffcemux;
+                               ffBcepol = dffcepol;
+                       }
+                       sigB = dffD;
+               }
+       }
+endcode
 
-               clock = port(ffA, \CLK).as_bit();
-               clock_pol = param(ffA, \CLK_POLARITY).as_bool();
+code argD ffFJKG ffFJKGrstmux ffFJKGrstpol sigH sigO clock clock_pol
+       if (nusers(sigH) == 2 &&
+                       (mul->type != \SB_MAC16 ||
+                        (!param(mul, \TOP_8x8_MULT_REG).as_bool() && !param(mul, \BOT_8x8_MULT_REG).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG2).as_bool()))) {
+               argD = sigH;
+               subpattern(out_dffe);
+               if (dff) {
+                       ffFJKG = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       if (dffrstmux) {
+                               ffFJKGrstmux = dffrstmux;
+                               ffFJKGrstpol = dffrstpol;
+                       }
+                       // F/J/K/G do not have a CE-like (hold) input
+                       if (dffcemux)
+                               reject;
 
-               sigA.replace(port(ffA, \Q), port(ffA, \D));
+                       // Reset signal of F/J (IRSTTOP) and K/G (IRSTBOT)
+                       //   shared with A and B
+                       if ((ffArstmux != NULL) != (ffFJKGrstmux != NULL))
+                               reject;
+                       if ((ffBrstmux != NULL) != (ffFJKGrstmux != NULL))
+                               reject;
+                       if (ffArstmux) {
+                               if (port(ffArstmux, \S) != port(ffFJKGrstmux, \S))
+                                       reject;
+                               if (ffArstpol != ffFJKGrstpol)
+                                       reject;
+                       }
+                       if (ffBrstmux) {
+                               if (port(ffBrstmux, \S) != port(ffFJKGrstmux, \S))
+                                       reject;
+                               if (ffBrstpol != ffFJKGrstpol)
+                                       reject;
+                       }
+
+                       sigH = dffQ;
+               }
        }
+
+       sigO = sigH;
 endcode
 
-match ffB
-       if mul->type != \SB_MAC16 || !param(mul, \B_REG).as_bool()
-       select ffB->type.in($dff)
-       filter GetSize(port(ffB, \Q)) >= GetSize(sigB)
-       slice offset GetSize(port(ffB, \Q))
-       filter offset+GetSize(sigB) <= GetSize(port(ffB, \Q)) && port(ffB, \Q).extract(offset, GetSize(sigB)) == sigB
+match add
+       if mul->type != \SB_MAC16 || (param(mul, \TOPOUTPUT_SELECT).as_int() == 3 && param(mul, \BOTOUTPUT_SELECT).as_int() == 3)
+       select add->type.in($add)
+       choice <IdString> AB {\A, \B}
+       select nusers(port(add, AB)) == 2
+       index <SigBit> port(add, AB)[0] === sigH[0]
+       filter GetSize(port(add, AB)) <= GetSize(sigH)
+       filter port(add, AB) ==  sigH.extract(0, GetSize(port(add, AB)))
+       set addAB AB
        optional
 endmatch
 
-code sigB clock clock_pol
-       if (ffB) {
-               for (auto b : port(ffB, \Q))
-                       if (b.wire->get_bool_attribute(\keep))
-                               reject;
+code sigCD sigO cd_signed
+       if (add) {
+               sigCD = port(add, addAB == \A ? \B : \A);
+               cd_signed = param(add, addAB == \A ? \B_SIGNED : \A_SIGNED).as_bool();
 
-               SigBit c = port(ffB, \CLK).as_bit();
-               bool cp = param(ffB, \CLK_POLARITY).as_bool();
+               int natural_mul_width = GetSize(sigA) + GetSize(sigB);
+               int actual_mul_width = GetSize(sigH);
+               int actual_acc_width = GetSize(sigCD);
 
-               if (clock != SigBit() && (c != clock || cp != clock_pol))
+               if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width))
+                       reject;
+               // If accumulator, check adder width and signedness
+               if (sigCD == sigH && (actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(add, \A_SIGNED).as_bool()))
                        reject;
 
-               clock = c;
-               clock_pol = cp;
-
-               sigB.replace(port(ffB, \Q), port(ffB, \D));
+               sigO = port(add, \Y);
        }
 endcode
 
-match ffFJKG
-       // Ensure pipeline register is not already used
-       if mul->type != \SB_MAC16 || (!param(mul, \TOP_8x8_MULT_REG).as_bool() && !param(mul, \BOT_8x8_MULT_REG).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG2).as_bool())
-       select ffFJKG->type.in($dff)
-       select nusers(port(ffFJKG, \D)) == 2
-       index <SigSpec> port(ffFJKG, \D) === sigH
+match mux
+       select mux->type == $mux
+       choice <IdString> AB {\A, \B}
+       index <int> nusers(port(mux, AB)) === 2
+       index <SigSpec> port(mux, AB) === sigO
+       set muxAB AB
        optional
 endmatch
 
-code sigH sigO clock clock_pol
-       if (ffFJKG) {
-               sigH = port(ffFJKG, \Q);
-               for (auto b : sigH)
-                       if (b.wire->get_bool_attribute(\keep))
-                               reject;
+code sigO
+       if (mux)
+               sigO = port(mux, \Y);
+endcode
 
-               SigBit c = port(ffFJKG, \CLK).as_bit();
-               bool cp = param(ffFJKG, \CLK_POLARITY).as_bool();
+code argD ffO ffOcemux ffOrstmux ffOcepol ffOrstpol sigO sigCD clock clock_pol cd_signed o_lo
+       if (mul->type != \SB_MAC16 ||
+                       // Ensure that register is not already used
+                       ((mul->parameters.at(\TOPOUTPUT_SELECT, 0).as_int() != 1 && mul->parameters.at(\BOTOUTPUT_SELECT, 0).as_int() != 1) &&
+                        // Ensure that OLOADTOP/OLOADBOT is unused or zero
+                        (mul->connections_.at(\OLOADTOP, State::S0).is_fully_zero() && mul->connections_.at(\OLOADBOT, State::S0).is_fully_zero()))) {
 
-               if (clock != SigBit() && (c != clock || cp != clock_pol))
-                       reject;
+               dff = nullptr;
+
+               // First try entire sigO
+               if (nusers(sigO) == 2) {
+                       argD = sigO;
+                       subpattern(out_dffe);
+               }
 
-               clock = c;
-               clock_pol = cp;
+               // Otherwise try just its least significant 16 bits
+               if (!dff && GetSize(sigO) > 16) {
+                       argD = sigO.extract(0, 16);
+                       if (nusers(argD) == 2) {
+                               subpattern(out_dffe);
+                               o_lo = dff;
+                       }
+               }
+
+               if (dff) {
+                       ffO = dff;
+                       clock = dffclock;
+                       clock_pol = dffclock_pol;
+                       if (dffrstmux) {
+                               ffOrstmux = dffrstmux;
+                               ffOrstpol = dffrstpol;
+                       }
+                       if (dffcemux) {
+                               ffOcemux = dffcemux;
+                               ffOcepol = dffcepol;
+                       }
+
+                       sigO.replace(sigO.extract(0, GetSize(dffQ)), dffQ);
+               }
+
+               // Loading value into output register is not
+               //   supported unless using accumulator
+               if (mux) {
+                       if (sigCD != sigO)
+                               reject;
+                       sigCD = port(mux, muxAB == \B ? \A : \B);
+
+                       cd_signed = add && param(add, \A_SIGNED).as_bool() && param(add, \B_SIGNED).as_bool();
+               }
        }
+       sigCD.extend_u0(32, cd_signed);
+endcode
 
-       sigO = sigH;
+code
+       accept;
 endcode
 
-match addA
-       select addA->type.in($add)
-       select nusers(port(addA, \A)) == 2
-       filter param(addA, \A_WIDTH).as_int() <= GetSize(sigH)
-       //index <SigSpec> port(addA, \A) === sigH.extract(0, param(addA, \A_WIDTH).as_int())
-       filter port(addA, \A) ==  sigH.extract(0, param(addA, \A_WIDTH).as_int())
-       optional
-endmatch
+// #######################
 
-match addB
-       if !addA
-       select addB->type.in($add, $sub)
-       select nusers(port(addB, \B)) == 2
-       filter param(addB, \B_WIDTH).as_int() <= GetSize(sigH)
-       //index <SigSpec> port(addB, \B) === sigH.extract(0, param(addB, \B_WIDTH).as_int())
-       filter port(addB, \B) ==  sigH.extract(0, param(addB, \B_WIDTH).as_int())
-       optional
-endmatch
+subpattern in_dffe
+arg argD argQ clock clock_pol
 
-code addAB sigCD sigO cd_signed
-       if (addA) {
-               addAB = addA;
-               sigCD = port(addAB, \B);
-               cd_signed = param(addAB, \B_SIGNED).as_bool();
-       }
-       else if (addB) {
-               addAB = addB;
-               sigCD = port(addAB, \A);
-               cd_signed = param(addAB, \A_SIGNED).as_bool();
+code
+       dff = nullptr;
+       for (auto c : argQ.chunks()) {
+               if (!c.wire)
+                       reject;
+               if (c.wire->get_bool_attribute(\keep))
+                       reject;
        }
-       if (addAB) {
-               if (mul->type == \SB_MAC16) {
-                       // Ensure that adder is not used
-                       if (param(mul, \TOPOUTPUT_SELECT).as_int() != 3 ||
-                                       param(mul, \BOTOUTPUT_SELECT).as_int() != 3)
-                               reject;
-               }
+endcode
 
-               int natural_mul_width = GetSize(sigA) + GetSize(sigB);
-               int actual_mul_width = GetSize(sigH);
-               int actual_acc_width = GetSize(sigCD);
+match ff
+       select ff->type.in($dff)
+       // DSP48E1 does not support clock inversion
+       select param(ff, \CLK_POLARITY).as_bool()
 
-               if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width))
+       slice offset GetSize(port(ff, \D))
+       index <SigBit> port(ff, \Q)[offset] === argQ[0]
+
+       // Check that the rest of argQ is present
+       filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
+       filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+       set ffoffset offset
+endmatch
+
+code argQ argD
+{
+       if (clock != SigBit()) {
+               if (port(ff, \CLK) != clock)
                        reject;
-               // If accumulator, check adder width and signedness
-               if (sigCD == sigH && (actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(addAB, \A_SIGNED).as_bool()))
+               if (param(ff, \CLK_POLARITY).as_bool() != clock_pol)
                        reject;
-
-               sigO = port(addAB, \Y);
        }
+
+       SigSpec Q = port(ff, \Q);
+       dff = ff;
+       dffclock = port(ff, \CLK);
+       dffclock_pol = param(ff, \CLK_POLARITY).as_bool();
+       dffD = argQ;
+       argD = port(ff, \D);
+       argQ = Q;
+       dffD.replace(argQ, argD);
+       // Only search for ffrstmux if dffD only
+       //   has two (ff, ffrstmux) users
+       if (nusers(dffD) > 2)
+               argD = SigSpec();
+}
 endcode
 
-match muxA
-       select muxA->type.in($mux)
-       index <int> nusers(port(muxA, \A)) === 2
-       index <SigSpec> port(muxA, \A) === sigO
-       optional
+match ffrstmux
+       if !argD.empty()
+       select ffrstmux->type.in($mux)
+       index <SigSpec> port(ffrstmux, \Y) === argD
+
+       choice <IdString> BA {\B, \A}
+       // DSP48E1 only supports reset to zero
+       select port(ffrstmux, BA).is_fully_zero()
+
+       define <bool> pol (BA == \B)
+       set ffrstpol pol
+       semioptional
 endmatch
 
-match muxB
-       if !muxA
-       select muxB->type.in($mux)
-       index <int> nusers(port(muxB, \B)) === 2
-       index <SigSpec> port(muxB, \B) === sigO
-       optional
+code argD
+       if (ffrstmux) {
+               dffrstmux = ffrstmux;
+               dffrstpol = ffrstpol;
+               argD = port(ffrstmux, ffrstpol ? \A : \B);
+               dffD.replace(port(ffrstmux, \Y), argD);
+
+               // Only search for ffcemux if argQ has at
+               //   least 3 users (ff, <upstream>, ffrstmux) and
+               //   dffD only has two (ff, ffrstmux)
+               if (!(nusers(argQ) >= 3 && nusers(dffD) == 2))
+                       argD = SigSpec();
+       }
+       else
+               dffrstmux = nullptr;
+endcode
+
+match ffcemux
+       if !argD.empty()
+       select ffcemux->type.in($mux)
+       index <SigSpec> port(ffcemux, \Y) === argD
+       choice <IdString> AB {\A, \B}
+       index <SigSpec> port(ffcemux, AB) === argQ
+       define <bool> pol (AB == \A)
+       set ffcepol pol
+       semioptional
 endmatch
 
-code muxAB sigO
-       if (muxA)
-               muxAB = muxA;
-       else if (muxB)
-               muxAB = muxB;
-       if (muxAB)
-               sigO = port(muxAB, \Y);
+code argD
+       if (ffcemux) {
+               dffcemux = ffcemux;
+               dffcepol = ffcepol;
+               argD = port(ffcemux, ffcepol ? \B : \A);
+               dffD.replace(port(ffcemux, \Y), argD);
+       }
+       else
+               dffcemux = nullptr;
 endcode
 
-match ffO
-       // Ensure that register is not already used
-       if mul->type != \SB_MAC16 || (mul->parameters.at(\TOPOUTPUT_SELECT, 0).as_int() != 1 && mul->parameters.at(\BOTOUTPUT_SELECT, 0).as_int() != 1)
-       // Ensure that OLOADTOP/OLOADBOT is unused or zero
-       if mul->type != \SB_MAC16 || (mul->connections_.at(\OLOADTOP, State::S0).is_fully_zero() && mul->connections_.at(\OLOADBOT, State::S0).is_fully_zero())
-       if nusers(sigO) == 2
-       select ffO->type.in($dff)
-       filter GetSize(port(ffO, \D)) >= GetSize(sigO)
-       slice offset GetSize(port(ffO, \D))
-       filter offset+GetSize(sigO) <= GetSize(port(ffO, \D)) && port(ffO, \D).extract(offset, GetSize(sigO)) == sigO
-       optional
+// #######################
+
+subpattern out_dffe
+arg argD argQ clock clock_pol
+
+code
+       dff = nullptr;
+endcode
+
+match ffcemux
+       select ffcemux->type.in($mux)
+       // ffcemux output must have two users: ffcemux and ff.D
+       select nusers(port(ffcemux, \Y)) == 2
+
+       choice <IdString> AB {\A, \B}
+       // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s)
+       select nusers(port(ffcemux, AB)) >= 3
+
+       slice offset GetSize(port(ffcemux, \Y))
+       define <IdString> BA (AB == \A ? \B : \A)
+       index <SigBit> port(ffcemux, BA)[offset] === argD[0]
+
+       // Check that the rest of argD is present
+       filter GetSize(BA) >= offset + GetSize(argD)
+       filter port(ffcemux, BA).extract(offset, GetSize(argD)) == argD
+
+       set ffoffset offset
+       define <bool> pol (BA == \B)
+       set ffcepol pol
+
+       semioptional
 endmatch
 
-match ffO_lo
-       if !ffO && GetSize(sigO) > 16
-       // Ensure that register is not already used
-       if mul->type != \SB_MAC16 || (mul->parameters.at(\TOPOUTPUT_SELECT, 0).as_int() != 1 && mul->parameters.at(\BOTOUTPUT_SELECT, 0).as_int() != 1)
-       // Ensure that OLOADTOP/OLOADBOT is unused or zero
-       if mul->type != \SB_MAC16 || (mul->connections_.at(\OLOADTOP, State::S0).is_fully_zero() && mul->connections_.at(\OLOADBOT, State::S0).is_fully_zero())
-       if nusers(sigO.extract(0, 16)) == 2
-       select ffO_lo->type.in($dff)
-       filter GetSize(port(ffO_lo, \D)) >= 16
-       slice offset GetSize(port(ffO_lo, \D))
-       filter offset+GetSize(sigO) <= GetSize(port(ffO_lo, \D)) && port(ffO_lo, \D).extract(offset, 16) == sigO.extract(0, 16)
-       optional
+code argD argQ
+       dffcemux = ffcemux;
+       if (ffcemux) {
+               SigSpec BA = port(ffcemux, ffcepol ? \B : \A);
+               if (ffoffset + GetSize(argD) > GetSize(BA))
+                       reject;
+               for (int i = 1; i < GetSize(argD); i++)
+                       if (BA[ffoffset+i] != argD[i])
+                               reject;
+
+               SigSpec Y = port(ffcemux, \Y);
+               argQ = argD;
+               argD.replace(BA, Y);
+               argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B));
+
+               dffcemux = ffcemux;
+               dffcepol = ffcepol;
+       }
+endcode
+
+match ffrstmux
+       select ffrstmux->type.in($mux)
+       // ffrstmux output must have two users: ffrstmux and ff.D
+       select nusers(port(ffrstmux, \Y)) == 2
+
+       choice <IdString> BA {\B, \A}
+       // DSP48E1 only supports reset to zero
+       select port(ffrstmux, BA).is_fully_zero()
+
+       slice offset GetSize(port(ffrstmux, \Y))
+       define <IdString> AB (BA == \B ? \A : \B)
+       index <SigBit> port(ffrstmux, AB)[offset] === argD[0]
+
+       // Check that offset is consistent
+       filter !ffcemux || ffoffset == offset
+       // Check that the rest of argD is present
+       filter GetSize(AB) >= offset + GetSize(argD)
+       filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD
+
+       set ffoffset offset
+       define <bool> pol (AB == \A)
+       set ffrstpol pol
+
+       semioptional
 endmatch
 
-code ffO clock clock_pol sigO sigCD cd_signed
-       if (ffO_lo) {
-               log_assert(!ffO);
-               ffO = ffO_lo;
+code argD argQ
+       dffrstmux = ffrstmux;
+       if (ffrstmux) {
+               SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B);
+               SigSpec Y = port(ffrstmux, \Y);
+               argD.replace(AB, Y);
+
+               dffrstmux = ffrstmux;
+               dffrstpol = ffrstpol;
        }
-       if (ffO) {
-               for (auto b : port(ffO, \Q))
-                       if (b.wire->get_bool_attribute(\keep))
-                               reject;
+endcode
 
-               SigBit c = port(ffO, \CLK).as_bit();
-               bool cp = param(ffO, \CLK_POLARITY).as_bool();
+match ff
+       select ff->type.in($dff)
+       // DSP48E1 does not support clock inversion
+       select param(ff, \CLK_POLARITY).as_bool()
 
-               if (clock != SigBit() && (c != clock || cp != clock_pol))
-                       reject;
+       slice offset GetSize(port(ff, \D))
+       index <SigBit> port(ff, \D)[offset] === argD[0]
 
-               clock = c;
-               clock_pol = cp;
+       // Check that offset is consistent
+       filter (!ffcemux && !ffrstmux) || ffoffset == offset
+       // Check that the rest of argD is present
+       filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
+       filter port(ff, \D).extract(offset, GetSize(argD)) == argD
+       // Check that FF.Q is connected to CE-mux
+       filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
 
-               sigO.replace(port(ffO, \D), port(ffO, \Q));
+       set ffoffset offset
 
-               // Loading value into output register is not
-               //   supported unless using accumulator
-               if (muxAB) {
-                       if (sigCD != sigO)
+       semioptional
+endmatch
+
+code argQ
+       if (ff) {
+               if (clock != SigBit()) {
+                       if (port(ff, \CLK) != clock)
                                reject;
-                       if (muxA)
-                               sigCD = port(muxAB, \B);
-                       else if (muxB)
-                               sigCD = port(muxAB, \A);
-                       else log_abort();
+                       if (param(ff, \CLK_POLARITY).as_bool() != clock_pol)
+                               reject;
+               }
 
-                       cd_signed = addAB && param(addAB, \A_SIGNED).as_bool() && param(addAB, \B_SIGNED).as_bool();
+               SigSpec D = port(ff, \D);
+               SigSpec Q = port(ff, \Q);
+               if (!ffcemux) {
+                       argQ = argD;
+                       argQ.replace(D, Q);
                }
-       }
-       sigCD.extend_u0(32, cd_signed);
-endcode
 
-code
-       accept;
+               for (auto c : argQ.chunks()) {
+                       if (c.wire->get_bool_attribute(\keep))
+                               reject;
+                       Const init = c.wire->attributes.at(\init, State::Sx);
+                       if (!init.is_fully_undef() && !init.is_fully_zero())
+                               reject;
+               }
+
+               dff = ff;
+               dffQ = argQ;
+               dffclock = port(ff, \CLK);
+               dffclock_pol = param(ff, \CLK_POLARITY).as_bool();
+       }
+       // No enable/reset mux possible without flop
+       else if (dffcemux || dffrstmux)
+               reject;
 endcode